diff --git a/CMakeLists.txt b/CMakeLists.txt index 6061b22354..579967a0a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -133,9 +133,9 @@ else() set(ZIG_SYSTEM_LIBCXX "stdc++" CACHE STRING "system libcxx name for build.zig") endif() -find_package(llvm 21) -find_package(clang 21) -find_package(lld 21) +find_package(llvm 22) +find_package(clang 22) +find_package(lld 22) if(ZIG_STATIC_ZLIB) if (MSVC) diff --git a/build.zig b/build.zig index 3ac840c309..ddbb9c5db0 100644 --- a/build.zig +++ b/build.zig @@ -1206,13 +1206,16 @@ const zig_cpp_sources = [_][]const u8{ const clang_libs = [_][]const u8{ "clangFrontendTool", "clangCodeGen", - "clangFrontend", - "clangDriver", - "clangSerialization", - "clangSema", "clangStaticAnalyzerFrontend", "clangStaticAnalyzerCheckers", "clangStaticAnalyzerCore", + "clangCrossTU", + "clangFrontend", + "clangDriver", + "clangOptions", + "clangSerialization", + "clangSema", + "clangAnalysisLifetimeSafety", "clangAnalysis", "clangASTMatchers", "clangAST", @@ -1224,8 +1227,9 @@ const clang_libs = [_][]const u8{ "clangLex", "clangRewriteFrontend", "clangRewrite", - "clangCrossTU", "clangIndex", + "clangFormat", + "clangToolingInclusions", "clangToolingCore", "clangExtractAPI", "clangSupport", @@ -1373,11 +1377,12 @@ const llvm_libs = [_][]const u8{ "LLVMObjCopy", "LLVMMCA", "LLVMMCDisassembler", + "LLVMDTLTO", "LLVMLTO", "LLVMFrontendOpenACC", - "LLVMFrontendHLSL", "LLVMFrontendDriver", "LLVMExtensions", + "LLVMPlugins", "LLVMPasses", "LLVMHipStdPar", "LLVMCoroutines", @@ -1404,6 +1409,7 @@ const llvm_libs = [_][]const u8{ "LLVMObjCARCOpts", "LLVMCodeGenTypes", "LLVMCGData", + "LLVMCAS", "LLVMIRPrinter", "LLVMInterfaceStub", "LLVMFileCheck", @@ -1422,15 +1428,17 @@ const llvm_libs = [_][]const u8{ "LLVMDebugInfoCodeView", "LLVMDebugInfoGSYM", "LLVMDebugInfoDWARF", - "LLVMDebugInfoDWARFLowLevel", "LLVMObject", "LLVMTextAPI", "LLVMMCParser", "LLVMIRReader", "LLVMAsmParser", "LLVMMC", + "LLVMDebugInfoDWARFLowLevel", "LLVMBitReader", + "LLVMFrontendHLSL", "LLVMFuzzerCLI", + "LLVMABI", "LLVMCore", "LLVMRemarks", "LLVMBitstreamReader", diff --git a/ci/aarch64-freebsd-debug.sh b/ci/aarch64-freebsd-debug.sh index 2f0ebdc723..ad852e4601 100755 --- a/ci/aarch64-freebsd-debug.sh +++ b/ci/aarch64-freebsd-debug.sh @@ -7,7 +7,7 @@ set -e TARGET="aarch64-freebsd-none" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.2287+eb3f16db5" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/aarch64-freebsd-release.sh b/ci/aarch64-freebsd-release.sh index 4f12e8367b..3db9e9a21d 100755 --- a/ci/aarch64-freebsd-release.sh +++ b/ci/aarch64-freebsd-release.sh @@ -7,7 +7,7 @@ set -e TARGET="aarch64-freebsd-none" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.2287+eb3f16db5" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/aarch64-linux-debug.sh b/ci/aarch64-linux-debug.sh index 7a4a6daa2a..5ee0a33c1c 100755 --- a/ci/aarch64-linux-debug.sh +++ b/ci/aarch64-linux-debug.sh @@ -7,7 +7,7 @@ set -e TARGET="aarch64-linux-musl" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/aarch64-linux-release.sh b/ci/aarch64-linux-release.sh index 39ad9767ab..bcdeade117 100755 --- a/ci/aarch64-linux-release.sh +++ b/ci/aarch64-linux-release.sh @@ -7,7 +7,7 @@ set -e TARGET="aarch64-linux-musl" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/aarch64-macos-debug.sh b/ci/aarch64-macos-debug.sh index 3a8a6c6484..9592a825de 100755 --- a/ci/aarch64-macos-debug.sh +++ b/ci/aarch64-macos-debug.sh @@ -8,7 +8,7 @@ set -e ZIGDIR="$PWD" TARGET="aarch64-macos-none" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/aarch64-macos-release.sh b/ci/aarch64-macos-release.sh index 4c4c240786..dc9837012c 100755 --- a/ci/aarch64-macos-release.sh +++ b/ci/aarch64-macos-release.sh @@ -8,7 +8,7 @@ set -e ZIGDIR="$PWD" TARGET="aarch64-macos-none" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/aarch64-netbsd-debug.sh b/ci/aarch64-netbsd-debug.sh index 4f5eb0d410..3445cd6526 100755 --- a/ci/aarch64-netbsd-debug.sh +++ b/ci/aarch64-netbsd-debug.sh @@ -7,7 +7,7 @@ set -e TARGET="aarch64-netbsd-none" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.2287+eb3f16db5" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/aarch64-netbsd-release.sh b/ci/aarch64-netbsd-release.sh index d9d9477904..e866720e30 100755 --- a/ci/aarch64-netbsd-release.sh +++ b/ci/aarch64-netbsd-release.sh @@ -7,7 +7,7 @@ set -e TARGET="aarch64-netbsd-none" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.2287+eb3f16db5" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/aarch64-windows.ps1 b/ci/aarch64-windows.ps1 index 96e0764256..c711127a61 100644 --- a/ci/aarch64-windows.ps1 +++ b/ci/aarch64-windows.ps1 @@ -1,5 +1,5 @@ $TARGET = "aarch64-windows-gnu" -$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31" +$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" $MCPU = "baseline" $ZIG_LLVM_CLANG_LLD_URL = "https://ziglang.org/deps/$ZIG_LLVM_CLANG_LLD_NAME.zip" $PREFIX_PATH = "$(Get-Location)\..\$ZIG_LLVM_CLANG_LLD_NAME" diff --git a/ci/loongarch64-linux-debug.sh b/ci/loongarch64-linux-debug.sh index 4cba17b031..a31239d41a 100755 --- a/ci/loongarch64-linux-debug.sh +++ b/ci/loongarch64-linux-debug.sh @@ -7,7 +7,7 @@ set -e TARGET="loongarch64-linux-musl" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.157+7fdd60df1" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/loongarch64-linux-release.sh b/ci/loongarch64-linux-release.sh index 5b05284d26..5163b07fde 100755 --- a/ci/loongarch64-linux-release.sh +++ b/ci/loongarch64-linux-release.sh @@ -7,7 +7,7 @@ set -e TARGET="loongarch64-linux-musl" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.157+7fdd60df1" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/powerpc64le-linux-debug.sh b/ci/powerpc64le-linux-debug.sh index 1b9a51e44d..5950a5cd8b 100755 --- a/ci/powerpc64le-linux-debug.sh +++ b/ci/powerpc64le-linux-debug.sh @@ -7,7 +7,7 @@ set -e TARGET="powerpc64le-linux-musl" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.1594+9fa433d71" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/powerpc64le-linux-release.sh b/ci/powerpc64le-linux-release.sh index 77e1ca803a..2b911a9aef 100755 --- a/ci/powerpc64le-linux-release.sh +++ b/ci/powerpc64le-linux-release.sh @@ -7,7 +7,7 @@ set -e TARGET="powerpc64le-linux-musl" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.1594+9fa433d71" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/riscv64-linux-debug.sh b/ci/riscv64-linux-debug.sh index 6eace21297..631573cfec 100755 --- a/ci/riscv64-linux-debug.sh +++ b/ci/riscv64-linux-debug.sh @@ -7,7 +7,7 @@ set -e TARGET="riscv64-linux-musl" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-riscv64-linux-musl-0.16.0-dev.104+689461e31" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/riscv64-linux-release.sh b/ci/riscv64-linux-release.sh index c3d28ee5e6..1f51b7d8c2 100755 --- a/ci/riscv64-linux-release.sh +++ b/ci/riscv64-linux-release.sh @@ -7,7 +7,7 @@ set -e TARGET="riscv64-linux-musl" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-riscv64-linux-musl-0.16.0-dev.104+689461e31" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/s390x-linux-debug.sh b/ci/s390x-linux-debug.sh index ffe4d0f02b..c66717b460 100755 --- a/ci/s390x-linux-debug.sh +++ b/ci/s390x-linux-debug.sh @@ -7,7 +7,7 @@ set -e TARGET="s390x-linux-musl" MCPU="z15" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.1354+94e98bfe8" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/s390x-linux-release.sh b/ci/s390x-linux-release.sh index 7fb6cd3641..ed050d8148 100755 --- a/ci/s390x-linux-release.sh +++ b/ci/s390x-linux-release.sh @@ -7,7 +7,7 @@ set -e TARGET="s390x-linux-musl" MCPU="z15" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.1354+94e98bfe8" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/x86_64-freebsd-debug.sh b/ci/x86_64-freebsd-debug.sh index 21839b6460..911bd9ecc1 100755 --- a/ci/x86_64-freebsd-debug.sh +++ b/ci/x86_64-freebsd-debug.sh @@ -7,7 +7,7 @@ set -e TARGET="x86_64-freebsd-none" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.312+164c598cd" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/x86_64-freebsd-release.sh b/ci/x86_64-freebsd-release.sh index 94c33537a4..114f63bf86 100755 --- a/ci/x86_64-freebsd-release.sh +++ b/ci/x86_64-freebsd-release.sh @@ -7,7 +7,7 @@ set -e TARGET="x86_64-freebsd-none" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.312+164c598cd" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/x86_64-linux-debug-llvm.sh b/ci/x86_64-linux-debug-llvm.sh index 9140063f1b..661c457ba1 100755 --- a/ci/x86_64-linux-debug-llvm.sh +++ b/ci/x86_64-linux-debug-llvm.sh @@ -7,7 +7,7 @@ set -e TARGET="x86_64-linux-musl" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/x86_64-linux-debug.sh b/ci/x86_64-linux-debug.sh index 90d9ff1797..5e15672668 100755 --- a/ci/x86_64-linux-debug.sh +++ b/ci/x86_64-linux-debug.sh @@ -7,7 +7,7 @@ set -e TARGET="x86_64-linux-musl" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/x86_64-linux-release.sh b/ci/x86_64-linux-release.sh index bb1866d456..4c82a88eac 100755 --- a/ci/x86_64-linux-release.sh +++ b/ci/x86_64-linux-release.sh @@ -7,7 +7,7 @@ set -e TARGET="x86_64-linux-musl" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/x86_64-netbsd-debug.sh b/ci/x86_64-netbsd-debug.sh index 68e9081f3b..5328d0ea77 100755 --- a/ci/x86_64-netbsd-debug.sh +++ b/ci/x86_64-netbsd-debug.sh @@ -7,7 +7,7 @@ set -e TARGET="x86_64-netbsd-none" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.2287+eb3f16db5" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/x86_64-netbsd-release.sh b/ci/x86_64-netbsd-release.sh index 225a527686..6fe3302c93 100755 --- a/ci/x86_64-netbsd-release.sh +++ b/ci/x86_64-netbsd-release.sh @@ -7,7 +7,7 @@ set -e TARGET="x86_64-netbsd-none" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.2287+eb3f16db5" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/x86_64-openbsd-debug.sh b/ci/x86_64-openbsd-debug.sh index 50066305af..fe4e6ade53 100755 --- a/ci/x86_64-openbsd-debug.sh +++ b/ci/x86_64-openbsd-debug.sh @@ -7,7 +7,7 @@ set -e TARGET="x86_64-openbsd-none" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.2051+28b83e3b0" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/x86_64-openbsd-release.sh b/ci/x86_64-openbsd-release.sh index ab5162fc60..d340ae8581 100755 --- a/ci/x86_64-openbsd-release.sh +++ b/ci/x86_64-openbsd-release.sh @@ -7,7 +7,7 @@ set -e TARGET="x86_64-openbsd-none" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.2051+28b83e3b0" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/x86_64-windows-debug.ps1 b/ci/x86_64-windows-debug.ps1 index 01cd5be244..a00f6812e0 100644 --- a/ci/x86_64-windows-debug.ps1 +++ b/ci/x86_64-windows-debug.ps1 @@ -1,6 +1,6 @@ $TARGET = "x86_64-windows-gnu" $MCPU = "baseline" -$PREFIX_PATH = "$($Env:USERPROFILE)\deps\zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31" +$PREFIX_PATH = "$($Env:USERPROFILE)\deps\zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" $ZIG = "$PREFIX_PATH\bin\zig.exe" $ZIG_LIB_DIR = "$(Get-Location)\lib" $ZSF_MAX_RSS = if ($Env:ZSF_MAX_RSS) { $Env:ZSF_MAX_RSS } else { 0 } diff --git a/ci/x86_64-windows-release.ps1 b/ci/x86_64-windows-release.ps1 index a16e2ddea5..181474a290 100644 --- a/ci/x86_64-windows-release.ps1 +++ b/ci/x86_64-windows-release.ps1 @@ -1,6 +1,6 @@ $TARGET = "x86_64-windows-gnu" $MCPU = "baseline" -$PREFIX_PATH = "$($Env:USERPROFILE)\deps\zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31" +$PREFIX_PATH = "$($Env:USERPROFILE)\deps\zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523" $ZIG = "$PREFIX_PATH\bin\zig.exe" $ZIG_LIB_DIR = "$(Get-Location)\lib" $ZSF_MAX_RSS = if ($Env:ZSF_MAX_RSS) { $Env:ZSF_MAX_RSS } else { 0 } diff --git a/cmake/Findclang.cmake b/cmake/Findclang.cmake index 4b7363da9c..b34c9ce57f 100644 --- a/cmake/Findclang.cmake +++ b/cmake/Findclang.cmake @@ -17,10 +17,10 @@ find_path(CLANG_INCLUDE_DIRS NAMES clang/Frontend/ASTUnit.h if(${LLVM_LINK_MODE} STREQUAL "shared") find_library(CLANG_LIBRARIES NAMES - libclang-cpp.so.21 - libclang-cpp.so.21.1 - clang-cpp-21.0 - clang-cpp210 + libclang-cpp.so.22 + libclang-cpp.so.22.1 + clang-cpp-22.0 + clang-cpp220 clang-cpp NAMES_PER_DIR HINTS "${LLVM_LIBDIRS}" @@ -44,13 +44,16 @@ else() FIND_AND_ADD_CLANG_LIB(clangFrontendTool) FIND_AND_ADD_CLANG_LIB(clangCodeGen) - FIND_AND_ADD_CLANG_LIB(clangFrontend) - FIND_AND_ADD_CLANG_LIB(clangDriver) - FIND_AND_ADD_CLANG_LIB(clangSerialization) - FIND_AND_ADD_CLANG_LIB(clangSema) FIND_AND_ADD_CLANG_LIB(clangStaticAnalyzerFrontend) FIND_AND_ADD_CLANG_LIB(clangStaticAnalyzerCheckers) FIND_AND_ADD_CLANG_LIB(clangStaticAnalyzerCore) + FIND_AND_ADD_CLANG_LIB(clangCrossTU) + FIND_AND_ADD_CLANG_LIB(clangFrontend) + FIND_AND_ADD_CLANG_LIB(clangDriver) + FIND_AND_ADD_CLANG_LIB(clangOptions) + FIND_AND_ADD_CLANG_LIB(clangSerialization) + FIND_AND_ADD_CLANG_LIB(clangSema) + FIND_AND_ADD_CLANG_LIB(clangAnalysisLifetimeSafety) FIND_AND_ADD_CLANG_LIB(clangAnalysis) FIND_AND_ADD_CLANG_LIB(clangASTMatchers) FIND_AND_ADD_CLANG_LIB(clangAST) @@ -62,8 +65,9 @@ else() FIND_AND_ADD_CLANG_LIB(clangLex) FIND_AND_ADD_CLANG_LIB(clangRewriteFrontend) FIND_AND_ADD_CLANG_LIB(clangRewrite) - FIND_AND_ADD_CLANG_LIB(clangCrossTU) FIND_AND_ADD_CLANG_LIB(clangIndex) + FIND_AND_ADD_CLANG_LIB(clangFormat) + FIND_AND_ADD_CLANG_LIB(clangToolingInclusions) FIND_AND_ADD_CLANG_LIB(clangToolingCore) FIND_AND_ADD_CLANG_LIB(clangExtractAPI) FIND_AND_ADD_CLANG_LIB(clangSupport) diff --git a/cmake/Findlld.cmake b/cmake/Findlld.cmake index 61cf1cd883..5a6e6f4680 100644 --- a/cmake/Findlld.cmake +++ b/cmake/Findlld.cmake @@ -9,23 +9,23 @@ find_path(LLD_INCLUDE_DIRS NAMES lld/Common/Driver.h HINTS ${LLVM_INCLUDE_DIRS} PATHS - /usr/lib/llvm-21/include - /usr/local/llvm210/include - /usr/local/llvm21/include - /usr/local/opt/lld@21/include - /opt/homebrew/opt/lld@21/include - /home/linuxbrew/.linuxbrew/opt/lld@21/include + /usr/lib/llvm-22/include + /usr/local/llvm220/include + /usr/local/llvm22/include + /usr/local/opt/lld@22/include + /opt/homebrew/opt/lld@22/include + /home/linuxbrew/.linuxbrew/opt/lld@22/include /mingw64/include) -find_library(LLD_LIBRARY NAMES lld-21.0 lld210 lld NAMES_PER_DIR +find_library(LLD_LIBRARY NAMES lld-22.0 lld220 lld NAMES_PER_DIR HINTS ${LLVM_LIBDIRS} PATHS - /usr/lib/llvm-21/lib - /usr/local/llvm210/lib - /usr/local/llvm21/lib - /usr/local/opt/lld@21/lib - /opt/homebrew/opt/lld@21/lib - /home/linuxbrew/.linuxbrew/opt/lld@21/lib + /usr/lib/llvm-22/lib + /usr/local/llvm220/lib + /usr/local/llvm22/lib + /usr/local/opt/lld@22/lib + /opt/homebrew/opt/lld@22/lib + /home/linuxbrew/.linuxbrew/opt/lld@22/lib ) if(EXISTS ${LLD_LIBRARY}) set(LLD_LIBRARIES ${LLD_LIBRARY}) @@ -36,12 +36,12 @@ else() HINTS ${LLVM_LIBDIRS} PATHS ${LLD_LIBDIRS} - /usr/lib/llvm-21/lib - /usr/local/llvm210/lib - /usr/local/llvm21/lib - /usr/local/opt/lld@21/lib - /opt/homebrew/opt/lld@21/lib - /home/linuxbrew/.linuxbrew/opt/lld@21/lib + /usr/lib/llvm-22/lib + /usr/local/llvm220/lib + /usr/local/llvm22/lib + /usr/local/opt/lld@22/lib + /opt/homebrew/opt/lld@22/lib + /home/linuxbrew/.linuxbrew/opt/lld@22/lib /mingw64/lib /c/msys64/mingw64/lib c:/msys64/mingw64/lib) diff --git a/cmake/Findllvm.cmake b/cmake/Findllvm.cmake index 0c08d4f0ac..7c3b347685 100644 --- a/cmake/Findllvm.cmake +++ b/cmake/Findllvm.cmake @@ -17,12 +17,12 @@ if(ZIG_USE_LLVM_CONFIG) # terminate when the right LLVM version is not found. unset(LLVM_CONFIG_EXE CACHE) find_program(LLVM_CONFIG_EXE - NAMES llvm-config-21 llvm-config-21.0 llvm-config210 llvm-config21 llvm-config NAMES_PER_DIR + NAMES llvm-config-22 llvm-config-22.0 llvm-config220 llvm-config22 llvm-config NAMES_PER_DIR PATHS "/mingw64/bin" "/c/msys64/mingw64/bin" "c:/msys64/mingw64/bin" - "C:/Libraries/llvm-21.0.0/bin") + "C:/Libraries/llvm-22.0.0/bin") if ("${LLVM_CONFIG_EXE}" STREQUAL "LLVM_CONFIG_EXE-NOTFOUND") if (NOT LLVM_CONFIG_ERROR_MESSAGES STREQUAL "") @@ -40,9 +40,9 @@ if(ZIG_USE_LLVM_CONFIG) OUTPUT_STRIP_TRAILING_WHITESPACE) get_filename_component(LLVM_CONFIG_DIR "${LLVM_CONFIG_EXE}" DIRECTORY) - if("${LLVM_CONFIG_VERSION}" VERSION_LESS 21 OR "${LLVM_CONFIG_VERSION}" VERSION_EQUAL 22 OR "${LLVM_CONFIG_VERSION}" VERSION_GREATER 22) + if("${LLVM_CONFIG_VERSION}" VERSION_LESS 22 OR "${LLVM_CONFIG_VERSION}" VERSION_EQUAL 23 OR "${LLVM_CONFIG_VERSION}" VERSION_GREATER 23) # Save the error message, in case this is the last llvm-config we find - list(APPEND LLVM_CONFIG_ERROR_MESSAGES "expected LLVM 21.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}") + list(APPEND LLVM_CONFIG_ERROR_MESSAGES "expected LLVM 22.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}") # Ignore this directory and try the search again list(APPEND CMAKE_IGNORE_PATH "${LLVM_CONFIG_DIR}") @@ -66,9 +66,9 @@ if(ZIG_USE_LLVM_CONFIG) if (LLVM_CONFIG_ERROR) # Save the error message, in case this is the last llvm-config we find if (ZIG_SHARED_LLVM) - list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 21.x found at ${LLVM_CONFIG_EXE} does not support linking as a shared library") + list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 22.x found at ${LLVM_CONFIG_EXE} does not support linking as a shared library") else() - list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 21.x found at ${LLVM_CONFIG_EXE} does not support linking as a static library") + list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 22.x found at ${LLVM_CONFIG_EXE} does not support linking as a static library") endif() # Ignore this directory and try the search again @@ -321,11 +321,12 @@ else() FIND_AND_ADD_LLVM_LIB(LLVMObjCopy) FIND_AND_ADD_LLVM_LIB(LLVMMCA) FIND_AND_ADD_LLVM_LIB(LLVMMCDisassembler) + FIND_AND_ADD_LLVM_LIB(LLVMDTLTO) FIND_AND_ADD_LLVM_LIB(LLVMLTO) FIND_AND_ADD_LLVM_LIB(LLVMFrontendOpenACC) - FIND_AND_ADD_LLVM_LIB(LLVMFrontendHLSL) FIND_AND_ADD_LLVM_LIB(LLVMFrontendDriver) FIND_AND_ADD_LLVM_LIB(LLVMExtensions) + FIND_AND_ADD_LLVM_LIB(LLVMPlugins) FIND_AND_ADD_LLVM_LIB(LLVMPasses) FIND_AND_ADD_LLVM_LIB(LLVMHipStdPar) FIND_AND_ADD_LLVM_LIB(LLVMCoroutines) @@ -352,6 +353,7 @@ else() FIND_AND_ADD_LLVM_LIB(LLVMObjCARCOpts) FIND_AND_ADD_LLVM_LIB(LLVMCodeGenTypes) FIND_AND_ADD_LLVM_LIB(LLVMCGData) + FIND_AND_ADD_LLVM_LIB(LLVMCAS) FIND_AND_ADD_LLVM_LIB(LLVMIRPrinter) FIND_AND_ADD_LLVM_LIB(LLVMInterfaceStub) FIND_AND_ADD_LLVM_LIB(LLVMFileCheck) @@ -370,15 +372,17 @@ else() FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoCodeView) FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoGSYM) FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoDWARF) - FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoDWARFLowLevel) FIND_AND_ADD_LLVM_LIB(LLVMObject) FIND_AND_ADD_LLVM_LIB(LLVMTextAPI) FIND_AND_ADD_LLVM_LIB(LLVMMCParser) FIND_AND_ADD_LLVM_LIB(LLVMIRReader) FIND_AND_ADD_LLVM_LIB(LLVMAsmParser) FIND_AND_ADD_LLVM_LIB(LLVMMC) + FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoDWARFLowLevel) FIND_AND_ADD_LLVM_LIB(LLVMBitReader) + FIND_AND_ADD_LLVM_LIB(LLVMFrontendHLSL) FIND_AND_ADD_LLVM_LIB(LLVMFuzzerCLI) + FIND_AND_ADD_LLVM_LIB(LLVMABI) FIND_AND_ADD_LLVM_LIB(LLVMCore) FIND_AND_ADD_LLVM_LIB(LLVMRemarks) FIND_AND_ADD_LLVM_LIB(LLVMBitstreamReader) diff --git a/lib/compiler/aro/aro/Compilation.zig b/lib/compiler/aro/aro/Compilation.zig index e4d2f5e467..bf3670a033 100644 --- a/lib/compiler/aro/aro/Compilation.zig +++ b/lib/compiler/aro/aro/Compilation.zig @@ -498,7 +498,6 @@ fn generateSystemDefines(comp: *Compilation, w: *Io.Writer) !void { .{ .fma, "__FMA__" }, .{ .f16c, "__F16C__" }, .{ .gfni, "__GFNI__" }, - .{ .evex512, "__EVEX512__" }, .{ .avx10_1, "__AVX10_1__" }, .{ .avx10_1, "__AVX10_1_512__" }, @@ -560,7 +559,6 @@ fn generateSystemDefines(comp: *Compilation, w: *Io.Writer) !void { .{ .amx_complex, "__AMX_COMPLEX__" }, .{ .amx_fp8, "__AMX_FP8__" }, .{ .amx_movrs, "__AMX_MOVRS__" }, - .{ .amx_transpose, "__AMX_TRANSPOSE__" }, .{ .amx_avx512, "__AMX_AVX512__" }, .{ .amx_tf32, "__AMX_TF32__" }, .{ .cmpccxadd, "__CMPCCXADD__" }, @@ -798,7 +796,6 @@ fn generateSystemDefines(comp: *Compilation, w: *Io.Writer) !void { .{ .fullfp16, "FP16_SCALAR_ARITHMETIC" }, .{ .dotprod, "DOTPROD" }, .{ .mte, "MEMORY_TAGGING" }, - .{ .tme, "TME" }, .{ .i8mm, "MATMUL_INT8" }, .{ .lse, "ATOMICS" }, .{ .f64mm, "SVE_MATMUL_FP64" }, diff --git a/lib/compiler_rt.zig b/lib/compiler_rt.zig index 501d35f0a2..9e62f047bb 100644 --- a/lib/compiler_rt.zig +++ b/lib/compiler_rt.zig @@ -447,29 +447,8 @@ pub const gnu_f16_abi = switch (builtin.cpu.arch) { pub const want_sparc_abi = builtin.cpu.arch.isSPARC(); -/// This seems to mostly correspond to `clang::TargetInfo::HasFloat16`. pub fn F16T(comptime OtherType: type) type { return switch (builtin.cpu.arch) { - .amdgcn, - .arm, - .armeb, - .thumb, - .thumbeb, - .aarch64, - .aarch64_be, - .hexagon, - .loongarch32, - .loongarch64, - .nvptx, - .nvptx64, - .riscv32, - .riscv32be, - .riscv64, - .riscv64be, - .s390x, - .spirv32, - .spirv64, - => f16, .x86, .x86_64 => if (builtin.target.os.tag.isDarwin()) switch (OtherType) { // Starting with LLVM 16, Darwin uses different abi for f16 // depending on the type of the other return/argument..??? @@ -477,7 +456,7 @@ pub fn F16T(comptime OtherType: type) type { f80, f128 => f16, else => unreachable, } else f16, - else => u16, + else => f16, }; } diff --git a/lib/compiler_rt/trunctfhf2.zig b/lib/compiler_rt/trunctfhf2.zig index 46c6e34ec9..5af87f9c12 100644 --- a/lib/compiler_rt/trunctfhf2.zig +++ b/lib/compiler_rt/trunctfhf2.zig @@ -4,6 +4,9 @@ const truncf = @import("./truncf.zig").truncf; comptime { symbol(&__trunctfhf2, "__trunctfhf2"); + if (compiler_rt.want_ppc_abi) { + symbol(&__trunctfhf2, "__trunckfhf2"); + } } pub fn __trunctfhf2(a: f128) callconv(.c) compiler_rt.F16T(f128) { diff --git a/lib/include/__clang_spirv_builtins.h b/lib/include/__clang_spirv_builtins.h index 9915cdfcae..9c7215f506 100644 --- a/lib/include/__clang_spirv_builtins.h +++ b/lib/include/__clang_spirv_builtins.h @@ -52,30 +52,30 @@ // Builtin IDs and sizes extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_num_workgroups) __size_t - __spirv_NumWorkgroups(int); + __spirv_BuiltInNumWorkgroups(int); extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_workgroup_size) __size_t - __spirv_WorkgroupSize(int); + __spirv_BuiltInWorkgroupSize(int); extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_workgroup_id) __size_t - __spirv_WorkgroupId(int); + __spirv_BuiltInWorkgroupId(int); extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_local_invocation_id) __size_t - __spirv_LocalInvocationId(int); + __spirv_BuiltInLocalInvocationId(int); extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_global_invocation_id) __size_t - __spirv_GlobalInvocationId(int); + __spirv_BuiltInGlobalInvocationId(int); extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_global_size) __size_t - __spirv_GlobalSize(int); + __spirv_BuiltInGlobalSize(int); extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_global_offset) __size_t - __spirv_GlobalOffset(int); + __spirv_BuiltInGlobalOffset(int); extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_subgroup_size) __uint32_t - __spirv_SubgroupSize(); + __spirv_BuiltInSubgroupSize(); extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_subgroup_max_size) __uint32_t - __spirv_SubgroupMaxSize(); + __spirv_BuiltInSubgroupMaxSize(); extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_num_subgroups) __uint32_t - __spirv_NumSubgroups(); + __spirv_BuiltInNumSubgroups(); extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_subgroup_id) __uint32_t - __spirv_SubgroupId(); + __spirv_BuiltInSubgroupId(); extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_subgroup_local_invocation_id) - __uint32_t __spirv_SubgroupLocalInvocationId(); + __uint32_t __spirv_BuiltInSubgroupLocalInvocationId(); // OpGenericCastToPtrExplicit diff --git a/lib/include/__float_float.h b/lib/include/__float_float.h new file mode 100644 index 0000000000..267c0721a7 --- /dev/null +++ b/lib/include/__float_float.h @@ -0,0 +1,176 @@ +/*===---- __float_float.h --------------------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_FLOAT_FLOAT_H +#define __CLANG_FLOAT_FLOAT_H + +#if (defined(__MINGW32__) || defined(_MSC_VER) || defined(_AIX)) && \ + __STDC_HOSTED__ + +/* Undefine anything that we'll be redefining below. */ +# undef FLT_EVAL_METHOD +# undef FLT_ROUNDS +# undef FLT_RADIX +# undef FLT_MANT_DIG +# undef DBL_MANT_DIG +# undef LDBL_MANT_DIG +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ + !defined(__STRICT_ANSI__) || \ + (defined(__cplusplus) && __cplusplus >= 201103L) || \ + (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) +# undef DECIMAL_DIG +# endif +# undef FLT_DIG +# undef DBL_DIG +# undef LDBL_DIG +# undef FLT_MIN_EXP +# undef DBL_MIN_EXP +# undef LDBL_MIN_EXP +# undef FLT_MIN_10_EXP +# undef DBL_MIN_10_EXP +# undef LDBL_MIN_10_EXP +# undef FLT_MAX_EXP +# undef DBL_MAX_EXP +# undef LDBL_MAX_EXP +# undef FLT_MAX_10_EXP +# undef DBL_MAX_10_EXP +# undef LDBL_MAX_10_EXP +# undef FLT_MAX +# undef DBL_MAX +# undef LDBL_MAX +# undef FLT_EPSILON +# undef DBL_EPSILON +# undef LDBL_EPSILON +# undef FLT_MIN +# undef DBL_MIN +# undef LDBL_MIN +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \ + !defined(__STRICT_ANSI__) || \ + (defined(__cplusplus) && __cplusplus >= 201703L) || \ + (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) +# undef FLT_TRUE_MIN +# undef DBL_TRUE_MIN +# undef LDBL_TRUE_MIN +# undef FLT_DECIMAL_DIG +# undef DBL_DECIMAL_DIG +# undef LDBL_DECIMAL_DIG +# undef FLT_HAS_SUBNORM +# undef DBL_HAS_SUBNORM +# undef LDBL_HAS_SUBNORM +# endif +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \ + !defined(__STRICT_ANSI__) +# undef FLT_NORM_MAX +# undef DBL_NORM_MAX +# undef LDBL_NORM_MAX +#endif +#endif + +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \ + !defined(__STRICT_ANSI__) +# undef FLT_SNAN +# undef DBL_SNAN +# undef LDBL_SNAN +#endif + +/* Characteristics of floating point types, C99 5.2.4.2.2 */ + +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ + (defined(__cplusplus) && __cplusplus >= 201103L) +#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__ +#endif +#define FLT_ROUNDS (__builtin_flt_rounds()) +#define FLT_RADIX __FLT_RADIX__ + +#define FLT_MANT_DIG __FLT_MANT_DIG__ +#define DBL_MANT_DIG __DBL_MANT_DIG__ +#define LDBL_MANT_DIG __LDBL_MANT_DIG__ + +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ + !defined(__STRICT_ANSI__) || \ + (defined(__cplusplus) && __cplusplus >= 201103L) || \ + (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) +# define DECIMAL_DIG __DECIMAL_DIG__ +#endif + +#define FLT_DIG __FLT_DIG__ +#define DBL_DIG __DBL_DIG__ +#define LDBL_DIG __LDBL_DIG__ + +#define FLT_MIN_EXP __FLT_MIN_EXP__ +#define DBL_MIN_EXP __DBL_MIN_EXP__ +#define LDBL_MIN_EXP __LDBL_MIN_EXP__ + +#define FLT_MIN_10_EXP __FLT_MIN_10_EXP__ +#define DBL_MIN_10_EXP __DBL_MIN_10_EXP__ +#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__ + +#define FLT_MAX_EXP __FLT_MAX_EXP__ +#define DBL_MAX_EXP __DBL_MAX_EXP__ +#define LDBL_MAX_EXP __LDBL_MAX_EXP__ + +#define FLT_MAX_10_EXP __FLT_MAX_10_EXP__ +#define DBL_MAX_10_EXP __DBL_MAX_10_EXP__ +#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__ + +#define FLT_MAX __FLT_MAX__ +#define DBL_MAX __DBL_MAX__ +#define LDBL_MAX __LDBL_MAX__ + +#define FLT_EPSILON __FLT_EPSILON__ +#define DBL_EPSILON __DBL_EPSILON__ +#define LDBL_EPSILON __LDBL_EPSILON__ + +#define FLT_MIN __FLT_MIN__ +#define DBL_MIN __DBL_MIN__ +#define LDBL_MIN __LDBL_MIN__ + +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \ + !defined(__STRICT_ANSI__) || \ + (defined(__cplusplus) && __cplusplus >= 201703L) || \ + (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) +# define FLT_TRUE_MIN __FLT_DENORM_MIN__ +# define DBL_TRUE_MIN __DBL_DENORM_MIN__ +# define LDBL_TRUE_MIN __LDBL_DENORM_MIN__ +# define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__ +# define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__ +# define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__ +# define FLT_HAS_SUBNORM __FLT_HAS_DENORM__ +# define DBL_HAS_SUBNORM __DBL_HAS_DENORM__ +# define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__ +#endif + +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \ + !defined(__STRICT_ANSI__) + /* C23 5.2.5.3.2p28 */ +# define FLT_SNAN (__builtin_nansf("")) +# define DBL_SNAN (__builtin_nans("")) +# define LDBL_SNAN (__builtin_nansl("")) + + /* C23 5.2.5.3.3p32 */ +# define FLT_NORM_MAX __FLT_NORM_MAX__ +# define DBL_NORM_MAX __DBL_NORM_MAX__ +# define LDBL_NORM_MAX __LDBL_NORM_MAX__ +#endif + +#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__ +# define FLT16_MANT_DIG __FLT16_MANT_DIG__ +# define FLT16_DECIMAL_DIG __FLT16_DECIMAL_DIG__ +# define FLT16_DIG __FLT16_DIG__ +# define FLT16_MIN_EXP __FLT16_MIN_EXP__ +# define FLT16_MIN_10_EXP __FLT16_MIN_10_EXP__ +# define FLT16_MAX_EXP __FLT16_MAX_EXP__ +# define FLT16_MAX_10_EXP __FLT16_MAX_10_EXP__ +# define FLT16_MAX __FLT16_MAX__ +# define FLT16_EPSILON __FLT16_EPSILON__ +# define FLT16_MIN __FLT16_MIN__ +# define FLT16_TRUE_MIN __FLT16_TRUE_MIN__ +#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */ + +#endif /* __CLANG_FLOAT_FLOAT_H */ diff --git a/lib/include/__float_header_macro.h b/lib/include/__float_header_macro.h new file mode 100644 index 0000000000..11b270e90d --- /dev/null +++ b/lib/include/__float_header_macro.h @@ -0,0 +1,12 @@ +/*===---- __float_header_macro.h -------------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_FLOAT_H +#define __CLANG_FLOAT_H +#endif /* __CLANG_FLOAT_H */ diff --git a/lib/include/__float_infinity_nan.h b/lib/include/__float_infinity_nan.h new file mode 100644 index 0000000000..7e253d0bc5 --- /dev/null +++ b/lib/include/__float_infinity_nan.h @@ -0,0 +1,20 @@ +/*===---- __float_infinity_nan.h -------------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_FLOAT_INFINITY_NAN_H +#define __CLANG_FLOAT_INFINITY_NAN_H + +/* C23 5.2.5.3.3p29-30 */ +#undef INFINITY +#undef NAN + +#define INFINITY (__builtin_inff()) +#define NAN (__builtin_nanf("")) + +#endif /* __CLANG_FLOAT_INFINITY_NAN_H */ diff --git a/lib/include/amo.h b/lib/include/amo.h new file mode 100644 index 0000000000..97eff35e9c --- /dev/null +++ b/lib/include/amo.h @@ -0,0 +1,131 @@ +/*===---- amo.h - PowerPC Atomic Memory Operations ------------------------===*\ + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * +\*===----------------------------------------------------------------------===*/ + +/* This header provides compatibility for GCC's AMO functions. + * The functions here call Clang's underlying AMO builtins. + */ + +#ifndef _AMO_H +#define _AMO_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* AMO Load Operation Codes (FC values) */ +enum { + _AMO_LD_ADD = 0x00, /* Fetch and Add */ + _AMO_LD_XOR = 0x01, /* Fetch and XOR */ + _AMO_LD_IOR = 0x02, /* Fetch and OR */ + _AMO_LD_AND = 0x03, /* Fetch and AND */ + _AMO_LD_UMAX = 0x04, /* Fetch and Maximum Unsigned */ + _AMO_LD_SMAX = 0x05, /* Fetch and Maximum Signed */ + _AMO_LD_UMIN = 0x06, /* Fetch and Minimum Unsigned */ + _AMO_LD_SMIN = 0x07, /* Fetch and Minimum Signed */ + _AMO_LD_SWAP = 0x08 /* Swap */ +}; + +/* 32-bit unsigned AMO load operations */ +static inline uint32_t amo_lwat_add(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_ADD); +} + +static inline uint32_t amo_lwat_xor(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_XOR); +} + +static inline uint32_t amo_lwat_ior(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_IOR); +} + +static inline uint32_t amo_lwat_and(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_AND); +} + +static inline uint32_t amo_lwat_umax(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_UMAX); +} + +static inline uint32_t amo_lwat_umin(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_UMIN); +} + +static inline uint32_t amo_lwat_swap(uint32_t *ptr, uint32_t val) { + return __builtin_amo_lwat(ptr, val, _AMO_LD_SWAP); +} + +/* 32-bit signed AMO load operations */ +static inline int32_t amo_lwat_sadd(int32_t *ptr, int32_t val) { + return __builtin_amo_lwat_s(ptr, val, _AMO_LD_ADD); +} + +static inline int32_t amo_lwat_smax(int32_t *ptr, int32_t val) { + return __builtin_amo_lwat_s(ptr, val, _AMO_LD_SMAX); +} + +static inline int32_t amo_lwat_smin(int32_t *ptr, int32_t val) { + return __builtin_amo_lwat_s(ptr, val, _AMO_LD_SMIN); +} + +static inline int32_t amo_lwat_sswap(int32_t *ptr, int32_t val) { + return __builtin_amo_lwat_s(ptr, val, _AMO_LD_SWAP); +} + +/* 64-bit unsigned AMO load operations */ +static inline uint64_t amo_ldat_add(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_ADD); +} + +static inline uint64_t amo_ldat_xor(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_XOR); +} + +static inline uint64_t amo_ldat_ior(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_IOR); +} + +static inline uint64_t amo_ldat_and(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_AND); +} + +static inline uint64_t amo_ldat_umax(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_UMAX); +} + +static inline uint64_t amo_ldat_umin(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_UMIN); +} + +static inline uint64_t amo_ldat_swap(uint64_t *ptr, uint64_t val) { + return __builtin_amo_ldat(ptr, val, _AMO_LD_SWAP); +} + +/* 64-bit signed AMO load operations */ +static inline int64_t amo_ldat_sadd(int64_t *ptr, int64_t val) { + return __builtin_amo_ldat_s(ptr, val, _AMO_LD_ADD); +} + +static inline int64_t amo_ldat_smax(int64_t *ptr, int64_t val) { + return __builtin_amo_ldat_s(ptr, val, _AMO_LD_SMAX); +} + +static inline int64_t amo_ldat_smin(int64_t *ptr, int64_t val) { + return __builtin_amo_ldat_s(ptr, val, _AMO_LD_SMIN); +} + +static inline int64_t amo_ldat_sswap(int64_t *ptr, int64_t val) { + return __builtin_amo_ldat_s(ptr, val, _AMO_LD_SWAP); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _AMO_H */ diff --git a/lib/include/amxavx512intrin.h b/lib/include/amxavx512intrin.h index bbde44fc26..18ef721cd1 100644 --- a/lib/include/amxavx512intrin.h +++ b/lib/include/amxavx512intrin.h @@ -16,7 +16,7 @@ #define __DEFAULT_FN_ATTRS_AVX512 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("amx-avx512,avx10.2-512"))) + __target__("amx-avx512,avx10.2"), __min_vector_width__(512))) /// Moves a row from a tile register to a zmm destination register, converting /// the int32 source elements to fp32. The row of the tile is selected by a @@ -52,6 +52,40 @@ /// The row of the source tile #define _tile_cvtrowd2ps(tsrc, row) __builtin_ia32_tcvtrowd2ps(tsrc, row) +/// Moves a row from a tile register to a zmm destination register, converting +/// the int32 source elements to fp32. The row of the tile is selected by a +/// 8b immediate value. +/// +/// \headerfile +/// +/// \code +/// __m512i _tile_cvtrowd2psi(__tile tsrc, const unsigned int imm8); +/// \endcode +/// +/// \code{.operation} +/// VL := 512 +/// VL_bytes := VL >> 3 +/// row_index := imm8 & 0x3f +/// row_chunk := (imm8 >> 6) * VL_bytes +/// FOR i := 0 TO (VL_bytes / 4) - 1 +/// IF i + row_chunk / 4 >= tsrc.colsb / 4 +/// dst.dword[i] := 0 +/// ELSE +/// dst.f32[i] := CONVERT_INT32_TO_FP32(tsrc.row[row_index].dword[row_chunk/4+i], RNE) +/// FI +/// ENDFOR +/// dst[MAX_VL-1:VL] := 0 +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TCVTROWD2PS instruction. +/// +/// \param tsrc +/// The source tile. Max size is 1024 Bytes. +/// \param imm8 +/// The row of the source tile +#define _tile_cvtrowd2psi(tsrc, imm8) __builtin_ia32_tcvtrowd2psi(tsrc, imm8) + /// Moves a row from a tile register to a zmm destination register, converting /// the fp32 source elements to bf16. It places the resulting bf16 elements /// in the high 16 bits within each dword. The row of the tile is selected @@ -89,6 +123,43 @@ #define _tile_cvtrowps2bf16h(tsrc, row) \ __builtin_ia32_tcvtrowps2bf16h(tsrc, row) +/// Moves a row from a tile register to a zmm destination register, converting +/// the fp32 source elements to bf16. It places the resulting bf16 elements +/// in the high 16 bits within each dword. The row of the tile is selected +/// by a 8b immediate value. +/// +/// \headerfile +/// +/// \code +/// __m512i _tile_cvtrowps2bf16hi(__tile tsrc, const unsigned int imm8); +/// \endcode +/// +/// \code{.operation} +/// VL := 512 +/// VL_bytes := VL >> 3 +/// row_index := imm8 & 0x3f +/// row_chunk := (imm8 >> 6) * VL_bytes +/// FOR i := 0 TO (VL_bytes / 4) - 1 +/// IF i + row_chunk / 4 >= tsrc.colsb / 4 +/// dst.dword[i] := 0 +/// ELSE +/// dst.word[2*i+0] := 0 +/// dst.bf16[2*i+1] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE) +/// FI +/// ENDFOR +/// dst[MAX_VL-1:VL] := 0 +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TCVTROWPS2BF16H instruction. +/// +/// \param tsrc +/// The source tile. Max size is 1024 Bytes. +/// \param imm8 +/// The the row of the source tile. +#define _tile_cvtrowps2bf16hi(tsrc, imm8) \ + __builtin_ia32_tcvtrowps2bf16hi(tsrc, imm8) + /// Moves a row from a tile register to a zmm destination register, converting /// the fp32 source elements to bf16. It places the resulting bf16 elements /// in the low 16 bits within each dword. The row of the tile is selected @@ -126,6 +197,43 @@ #define _tile_cvtrowps2bf16l(tsrc, row) \ __builtin_ia32_tcvtrowps2bf16l(tsrc, row) +/// Moves a row from a tile register to a zmm destination register, converting +/// the fp32 source elements to bf16. It places the resulting bf16 elements +/// in the low 16 bits within each dword. The row of the tile is selected +/// by a 8b immediate value. +/// +/// \headerfile +/// +/// \code +/// __m512i _tile_cvtrowps2bf16li(__tile tsrc, const unsigned int imm8); +/// \endcode +/// +/// \code{.operation} +/// VL := 512 +/// VL_bytes := VL >> 3 +/// row_index := imm8 & 0x3f +/// row_chunk := (imm8 >> 6) * VL_bytes +/// FOR i := 0 TO (VL_bytes / 4) - 1 +/// IF i + row_chunk / 4 >= tsrc.colsb / 4 +/// dst.dword[i] := 0 +/// ELSE +/// dst.word[2*i+1] := 0 +/// dst.bf16[2*i+0] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE) +/// FI +/// ENDFOR +/// dst[MAX_VL-1:VL] := 0 +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TCVTROWPS2BF16L instruction. +/// +/// \param tsrc +/// The source tile. Max size is 1024 Bytes. +/// \param imm8 +/// The the row of the source tile. +#define _tile_cvtrowps2bf16li(tsrc, imm8) \ + __builtin_ia32_tcvtrowps2bf16li(tsrc, imm8) + /// Moves a row from a tile register to a zmm destination register, converting /// the fp32 source elements to fp16. It places the resulting fp16 elements /// in the high 16 bits within each dword. The row of the tile is selected @@ -162,6 +270,43 @@ /// The the row of the source tile. #define _tile_cvtrowps2phh(tsrc, row) __builtin_ia32_tcvtrowps2phh(tsrc, row) +/// Moves a row from a tile register to a zmm destination register, converting +/// the fp32 source elements to fp16. It places the resulting fp16 elements +/// in the high 16 bits within each dword. The row of the tile is selected +/// by a 8b immediate value. +/// +/// \headerfile +/// +/// \code +/// __m512i _tile_cvtrowps2phhi(__tile tsrc, constunsigned int imm8); +/// \endcode +/// +/// \code{.operation} +/// VL := 512 +/// VL_bytes := VL >> 3 +/// row_index := imm8 & 0x3f +/// row_chunk := (imm8 >> 6) * VL_bytes +/// FOR i := 0 TO (VL_bytes / 4) - 1 +/// IF i + row_chunk / 4 >= tsrc.colsb / 4 +/// dst.dword[i] := 0 +/// ELSE +/// dst.word[2*i+0] := 0 +/// dst.fp16[2*i+1] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE) +/// FI +/// ENDFOR +/// dst[MAX_VL-1:VL] := 0 +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TCVTROWPS2PHH instruction. +/// +/// \param tsrc +/// The source tile. Max size is 1024 Bytes. +/// \param imm8 +/// The the row of the source tile. +#define _tile_cvtrowps2phhi(tsrc, imm8) \ + __builtin_ia32_tcvtrowps2phhi(tsrc, imm8) + /// Moves a row from a tile register to a zmm destination register, converting /// the fp32 source elements to fp16. It places the resulting fp16 elements /// in the low 16 bits within each dword. The row of the tile is selected @@ -198,6 +343,43 @@ /// The the row of the source tile. #define _tile_cvtrowps2phl(tsrc, row) __builtin_ia32_tcvtrowps2phl(tsrc, row) +/// Moves a row from a tile register to a zmm destination register, converting +/// the fp32 source elements to fp16. It places the resulting fp16 elements +/// in the low 16 bits within each dword. The row of the tile is selected +/// by a 8b immediate value. +/// +/// \headerfile +/// +/// \code +/// __m512i _tile_cvtrowps2phli(__tile tsrc, const unsigned int imm8); +/// \endcode +/// +/// \code{.operation} +/// VL := 512 +/// VL_bytes := VL >> 3 +/// row_index := imm8 & 0x3f +/// row_chunk := (imm8 >> 6) * VL_bytes +/// FOR i := 0 TO (VL_bytes / 4) - 1 +/// IF i + row_chunk / 4 >= tsrc.colsb / 4 +/// dst.dword[i] := 0 +/// ELSE +/// dst.word[2*i+1] := 0 +/// dst.fp16[2*i+0] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE) +/// FI +/// ENDFOR +/// dst[MAX_VL-1:VL] := 0 +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TCVTROWPS2PHL instruction. +/// +/// \param tsrc +/// The source tile. Max size is 1024 Bytes. +/// \param imm8 +/// The the row of the source tile. +#define _tile_cvtrowps2phli(tsrc, imm8) \ + __builtin_ia32_tcvtrowps2phli(tsrc, imm8) + /// Move one row of a tile data to a v16f32 data. /// The row of the tile is selected by a 32b GPR. /// @@ -230,6 +412,38 @@ /// \endcode #define _tile_movrow(a, b) ((__m512i)__builtin_ia32_tilemovrow(a, b)) +/// Move one row of a tile data to a v16f32 data. +/// The row of the tile is selected by a 8b immediate value. +/// +/// \headerfile +/// +/// \code +/// __m512 _tile_movrowi(__tile a, const unsigned b); +/// \endcode +/// +/// This intrinsic corresponds to the TILEMOVROW instruction. +/// +/// \param a +/// The 1st source tile. Max size is 1024 Bytes. +/// \param b +/// The 2nd source r32. Size is 4 Bytes. +/// \returns +/// The destination v16f32 data. Size is 64 Bytes. +/// +/// \code{.operation} +/// VL := 512 +/// VL_bytes := VL>>3 +/// row_index := b&0x3f +/// row_chunk := (b>>6) * VL_bytes +/// FOR i := 0 TO (VL_bytes-1) +/// IF (row_chunk + i >= a.colsb) +/// dst.byte[i] := 0 +/// ELSE +/// dst.byte[i] := a.row[row_index].byte[row_chunk+i] +/// ENDFOR +/// \endcode +#define _tile_movrowi(a, b) ((__m512i)__builtin_ia32_tilemovrowi(a, b)) + /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal( diff --git a/lib/include/amxbf16transposeintrin.h b/lib/include/amxbf16transposeintrin.h deleted file mode 100644 index 86f09f2ad8..0000000000 --- a/lib/include/amxbf16transposeintrin.h +++ /dev/null @@ -1,94 +0,0 @@ -/*===----- amxbf16transposeintrin.h - AMX-BF16 and AMX-TRANSPOSE ------------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===------------------------------------------------------------------------=== - */ - -#ifndef __IMMINTRIN_H -#error \ - "Never use directly; use instead." -#endif /* __IMMINTRIN_H */ - -#ifndef __AMX_BF16TRANSPOSEINTRIN_H -#define __AMX_BF16TRANSPOSEINTRIN_H -#ifdef __x86_64__ - -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("amx-bf16,amx-transpose"))) - -/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in -/// tiles \a a and \a b, accumulating the intermediate single-precision -/// (32-bit) floating-point elements with elements in \a dst, and store the -/// 32-bit result back to tile \a dst. -/// -/// \headerfile -/// -/// \code -/// void _tile_tdpbf16ps (__tile dst, __tile a, __tile b) -/// \endcode -/// -/// \code{.operation} -/// FOR m := 0 TO dst.rows - 1 -/// tmp := dst.row[m] -/// FOR k := 0 TO (a.colsb / 4) - 1 -/// FOR n := 0 TO (dst.colsb / 4) - 1 -/// tmp.bf32[n] += FP32(a.row[m].bf16[2*k+0]) * -/// FP32(b.row[k].bf16[2*n+0]) -/// tmp.bf32[n] += FP32(a.row[m].bf16[2*k+1]) * -/// FP32(b.row[k].bf16[2*n+1]) -/// ENDFOR -/// ENDFOR -/// write_row_and_zero(dst, m, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TTDPBF16PS instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param a -/// The 1st source tile. Max size is 1024 Bytes. -/// \param b -/// The 2nd source tile. Max size is 1024 Bytes. -#define _tile_tdpbf16ps(dst, a, b) __builtin_ia32_ttdpbf16ps((dst), (a), (b)) - -/// This is internal intrinsic. C/C++ user should avoid calling it directly. -static __inline__ _tile1024i __DEFAULT_FN_ATTRS -_tile_tdpbf16ps_internal(unsigned short m, unsigned short n, unsigned short k, - _tile1024i dst, _tile1024i src1, _tile1024i src2) { - return __builtin_ia32_ttdpbf16ps_internal(m, n, k, dst, src1, src2); -} - -/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in -/// tiles src0 and src1, accumulating the intermediate single-precision -/// (32-bit) floating-point elements with elements in "dst", and store the -/// 32-bit result back to tile "dst". -/// -/// \headerfile -/// -/// This intrinsic corresponds to the TTDPBF16PS instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static __inline__ void __tile_tdpbf16ps(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { - dst->tile = _tile_tdpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile, - src0.tile, src1.tile); -} - -#undef __DEFAULT_FN_ATTRS - -#endif /* __x86_64__ */ -#endif /* __AMX_BF16TRANSPOSEINTRIN_H */ diff --git a/lib/include/amxcomplextransposeintrin.h b/lib/include/amxcomplextransposeintrin.h deleted file mode 100644 index 11abaf98e9..0000000000 --- a/lib/include/amxcomplextransposeintrin.h +++ /dev/null @@ -1,303 +0,0 @@ -/*===----- amxcomplextransposeintrin.h - AMX-COMPLEX and AMX-TRANSPOSE ------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===------------------------------------------------------------------------=== - */ - -#ifndef __IMMINTRIN_H -#error \ - "Never use directly; include instead." -#endif // __IMMINTRIN_H - -#ifndef __AMX_COMPLEXTRANSPOSEINTRIN_H -#define __AMX_COMPLEXTRANSPOSEINTRIN_H -#ifdef __x86_64__ - -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("amx-complex,amx-transpose"))) - -/// Perform matrix multiplication of two tiles containing complex elements and -/// accumulate the results into a packed single precision tile. Each dword -/// element in input tiles \a a and \a b is interpreted as a complex number -/// with FP16 real part and FP16 imaginary part. -/// Calculates the imaginary part of the result. For each possible combination -/// of (transposed column of \a a, column of \a b), it performs a set of -/// multiplication and accumulations on all corresponding complex numbers -/// (one from \a a and one from \a b). The imaginary part of the \a a element -/// is multiplied with the real part of the corresponding \a b element, and -/// the real part of the \a a element is multiplied with the imaginary part -/// of the corresponding \a b elements. The two accumulated results are -/// added, and then accumulated into the corresponding row and column of -/// \a dst. -/// -/// \headerfile -/// -/// \code -/// void _tile_tcmmimfp16ps(__tile dst, __tile a, __tile b); -/// \endcode -/// -/// \code{.operation} -/// FOR m := 0 TO dst.rows - 1 -/// tmp := dst.row[m] -/// FOR k := 0 TO a.rows - 1 -/// FOR n := 0 TO (dst.colsb / 4) - 1 -/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1]) -/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0]) -/// ENDFOR -/// ENDFOR -/// write_row_and_zero(dst, m, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TTCMMIMFP16PS instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param a -/// The 1st source tile. Max size is 1024 Bytes. -/// \param b -/// The 2nd source tile. Max size is 1024 Bytes. -#define _tile_tcmmimfp16ps(dst, a, b) \ - __builtin_ia32_ttcmmimfp16ps((dst), (a), (b)) - -/// Perform matrix multiplication of two tiles containing complex elements and -/// accumulate the results into a packed single precision tile. Each dword -/// element in input tiles \a a and \a b is interpreted as a complex number -/// with FP16 real part and FP16 imaginary part. -/// Calculates the real part of the result. For each possible combination -/// of (rtransposed colum of \a a, column of \a b), it performs a set of -/// multiplication and accumulations on all corresponding complex numbers -/// (one from \a a and one from \a b). The real part of the \a a element is -/// multiplied with the real part of the corresponding \a b element, and the -/// negated imaginary part of the \a a element is multiplied with the -/// imaginary part of the corresponding \a b elements. The two accumulated -/// results are added, and then accumulated into the corresponding row and -/// column of \a dst. -/// -/// \headerfile -/// -/// \code -/// void _tile_tcmmrlfp16ps(__tile dst, __tile a, __tile b); -/// \endcode -/// -/// \code{.operation} -/// FOR m := 0 TO dst.rows - 1 -/// tmp := dst.row[m] -/// FOR k := 0 TO a.rows - 1 -/// FOR n := 0 TO (dst.colsb / 4) - 1 -/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+0]) -/// tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+1]) -/// ENDFOR -/// ENDFOR -/// write_row_and_zero(dst, m, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TTCMMIMFP16PS instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param a -/// The 1st source tile. Max size is 1024 Bytes. -/// \param b -/// The 2nd source tile. Max size is 1024 Bytes. -#define _tile_tcmmrlfp16ps(dst, a, b) \ - __builtin_ia32_ttcmmrlfp16ps((dst), (a), (b)) - -/// Perform matrix conjugate transpose and multiplication of two tiles -/// containing complex elements and accumulate the results into a packed -/// single precision tile. Each dword element in input tiles \a a and \a b -/// is interpreted as a complex number with FP16 real part and FP16 imaginary -/// part. -/// Calculates the imaginary part of the result. For each possible combination -/// of (transposed column of \a a, column of \a b), it performs a set of -/// multiplication and accumulations on all corresponding complex numbers -/// (one from \a a and one from \a b). The negated imaginary part of the \a a -/// element is multiplied with the real part of the corresponding \a b -/// element, and the real part of the \a a element is multiplied with the -/// imaginary part of the corresponding \a b elements. The two accumulated -/// results are added, and then accumulated into the corresponding row and -/// column of \a dst. -/// -/// \headerfile -/// -/// \code -/// void _tile_conjtcmmimfp16ps(__tile dst, __tile a, __tile b); -/// \endcode -/// -/// \code{.operation} -/// FOR m := 0 TO dst.rows - 1 -/// tmp := dst.row[m] -/// FOR k := 0 TO a.rows - 1 -/// FOR n := 0 TO (dst.colsb / 4) - 1 -/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1]) -/// tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0]) -/// ENDFOR -/// ENDFOR -/// write_row_and_zero(dst, m, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TCONJTCMMIMFP16PS instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param a -/// The 1st source tile. Max size is 1024 Bytes. -/// \param b -/// The 2nd source tile. Max size is 1024 Bytes. -#define _tile_conjtcmmimfp16ps(dst, a, b) \ - __builtin_ia32_tconjtcmmimfp16ps((dst), (a), (b)) - -/// Perform conjugate transpose of an FP16-pair of complex elements from \a a -/// and writes the result to \a dst. -/// -/// \headerfile -/// -/// \code -/// void _tile_conjtfp16(__tile dst, __tile a); -/// \endcode -/// -/// \code{.operation} -/// FOR i := 0 TO dst.rows - 1 -/// FOR j := 0 TO (dst.colsb / 4) - 1 -/// tmp.fp16[2*j+0] := a.row[j].fp16[2*i+0] -/// tmp.fp16[2*j+1] := -a.row[j].fp16[2*i+1] -/// ENDFOR -/// write_row_and_zero(dst, i, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TCONJTFP16 instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param a -/// The source tile. Max size is 1024 Bytes. -#define _tile_conjtfp16(dst, a) __builtin_ia32_tconjtfp16((dst), (a)) - -static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmimfp16ps_internal( - unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, - _tile1024i src1, _tile1024i src2) { - return __builtin_ia32_ttcmmimfp16ps_internal(m, n, k, dst, src1, src2); -} - -static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmrlfp16ps_internal( - unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, - _tile1024i src1, _tile1024i src2) { - return __builtin_ia32_ttcmmrlfp16ps_internal(m, n, k, dst, src1, src2); -} - -static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_conjtcmmimfp16ps_internal( - unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, - _tile1024i src1, _tile1024i src2) { - return __builtin_ia32_tconjtcmmimfp16ps_internal(m, n, k, dst, src1, src2); -} - -static __inline__ _tile1024i __DEFAULT_FN_ATTRS -_tile_conjtfp16_internal(unsigned short m, unsigned short n, _tile1024i src) { - return __builtin_ia32_tconjtfp16_internal(m, n, src); -} - -/// Perform matrix multiplication of two tiles containing complex elements and -/// accumulate the results into a packed single precision tile. Each dword -/// element in input tiles src0 and src1 is interpreted as a complex number -/// with FP16 real part and FP16 imaginary part. -/// This function calculates the imaginary part of the result. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the TTCMMIMFP16PS instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static void __tile_tcmmimfp16ps(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { - dst->tile = _tile_tcmmimfp16ps_internal(src0.row, src1.col, src0.col, - dst->tile, src0.tile, src1.tile); -} - -/// Perform matrix multiplication of two tiles containing complex elements and -/// accumulate the results into a packed single precision tile. Each dword -/// element in input tiles src0 and src1 is interpreted as a complex number -/// with FP16 real part and FP16 imaginary part. -/// This function calculates the real part of the result. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the TTCMMRLFP16PS instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static void __tile_tcmmrlfp16ps(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { - dst->tile = _tile_tcmmrlfp16ps_internal(src0.row, src1.col, src0.col, - dst->tile, src0.tile, src1.tile); -} - -/// Perform matrix conjugate transpose and multiplication of two tiles -/// containing complex elements and accumulate the results into a packed -/// single precision tile. Each dword element in input tiles src0 and src1 -/// is interpreted as a complex number with FP16 real part and FP16 imaginary -/// part. -/// This function calculates the imaginary part of the result. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the TCONJTCMMIMFP16PS instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static void __tile_conjtcmmimfp16ps(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { - dst->tile = _tile_conjtcmmimfp16ps_internal(src0.row, src1.col, src0.col, - dst->tile, src0.tile, src1.tile); -} - -/// Perform conjugate transpose of an FP16-pair of complex elements from src and -/// writes the result to dst. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the TCONJTFP16 instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src -/// The source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static void __tile_conjtfp16(__tile1024i *dst, __tile1024i src) { - dst->tile = _tile_conjtfp16_internal(src.row, src.col, src.tile); -} - -#undef __DEFAULT_FN_ATTRS - -#endif // __x86_64__ -#endif // __AMX_COMPLEXTRANSPOSEINTRIN_H diff --git a/lib/include/amxfp16transposeintrin.h b/lib/include/amxfp16transposeintrin.h deleted file mode 100644 index 191f8c6097..0000000000 --- a/lib/include/amxfp16transposeintrin.h +++ /dev/null @@ -1,94 +0,0 @@ -/*===----- amxfp16transposeintrin.h - AMX-FP16 and AMX-TRANSPOSE ------------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===------------------------------------------------------------------------=== - */ - -#ifndef __IMMINTRIN_H -#error \ - "Never use directly; use instead." -#endif /* __IMMINTRIN_H */ - -#ifndef __AMX_FP16TRANSPOSEINTRIN_H -#define __AMX_FP16TRANSPOSEINTRIN_H -#ifdef __x86_64__ - -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("amx-fp16,amx-transpose"))) - -/// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in -/// tiles \a a and \a b, accumulating the intermediate single-precision -/// (32-bit) floating-point elements with elements in \a dst, and store the -/// 32-bit result back to tile \a dst. -/// -/// \headerfile -/// -/// \code -/// void _tile_tdpfp16ps (__tile dst, __tile a, __tile b) -/// \endcode -/// -/// \code{.operation} -/// FOR m := 0 TO dst.rows - 1 -/// tmp := dst.row[m] -/// FOR k := 0 TO (a.colsb / 4) - 1 -/// FOR n := 0 TO (dst.colsb / 4) - 1 -/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * -/// FP32(b.row[k].fp16[2*n+0]) -/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * -/// FP32(b.row[k].fp16[2*n+1]) -/// ENDFOR -/// ENDFOR -/// write_row_and_zero(dst, m, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TTDPFP16PS instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param a -/// The 1st source tile. Max size is 1024 Bytes. -/// \param b -/// The 2nd source tile. Max size is 1024 Bytes. -#define _tile_tdpfp16ps(dst, a, b) __builtin_ia32_ttdpfp16ps((dst), (a), (b)) - -/// This is internal intrinsic. C/C++ user should avoid calling it directly. -static __inline__ _tile1024i __DEFAULT_FN_ATTRS -_tile_tdpfp16ps_internal(unsigned short m, unsigned short n, unsigned short k, - _tile1024i dst, _tile1024i src1, _tile1024i src2) { - return __builtin_ia32_ttdpfp16ps_internal(m, n, k, dst, src1, src2); -} - -/// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in -/// tiles src0 and src1, accumulating the intermediate single-precision -/// (32-bit) floating-point elements with elements in "dst", and store the -/// 32-bit result back to tile "dst". -/// -/// \headerfile -/// -/// This intrinsic corresponds to the TTDPFP16PS instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static __inline__ void __tile_tdpfp16ps(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { - dst->tile = _tile_tdpfp16ps_internal(src0.row, src1.col, src0.col, dst->tile, - src0.tile, src1.tile); -} - -#undef __DEFAULT_FN_ATTRS - -#endif /* __x86_64__ */ -#endif /* __AMX_FP16TRANSPOSEINTRIN_H */ diff --git a/lib/include/amxintrin.h b/lib/include/amxintrin.h index a7da10d995..208aa35806 100644 --- a/lib/include/amxintrin.h +++ b/lib/include/amxintrin.h @@ -230,8 +230,6 @@ static __inline__ void __DEFAULT_FN_ATTRS_TILE _tile_release(void) { /// bytes. Since there is no 2D type in llvm IR, we use vector type to /// represent 2D tile and the fixed size is maximum amx tile register size. typedef int _tile1024i __attribute__((__vector_size__(1024), __aligned__(64))); -typedef int _tile1024i_1024a - __attribute__((__vector_size__(1024), __aligned__(1024))); /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TILE diff --git a/lib/include/amxmovrstransposeintrin.h b/lib/include/amxmovrstransposeintrin.h deleted file mode 100644 index 5f48cba949..0000000000 --- a/lib/include/amxmovrstransposeintrin.h +++ /dev/null @@ -1,200 +0,0 @@ -/* ===--- amxmovrstransposeintrin.h - AMX_MOVRS_TRANSPOSE intrinsics --------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * ===-----------------------------------------------------------------------=== - */ - -#ifndef __IMMINTRIN_H -#error \ - "Never use directly; use instead." -#endif /* __IMMINTRIN_H */ - -#ifndef __AMX_MOVRS_TRANSPOSEINTRIN_H -#define __AMX_MOVRS_TRANSPOSEINTRIN_H -#ifdef __x86_64__ - -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("amx-transpose,amx-movrs"))) - -#define _tile_2rpntlvwz0rs(tdst, base, stride) \ - __builtin_ia32_t2rpntlvwz0rs(tdst, base, stride) -#define _tile_2rpntlvwz0rst1(tdst, base, stride) \ - __builtin_ia32_t2rpntlvwz0rst1(tdst, base, stride) -#define _tile_2rpntlvwz1rs(tdst, base, stride) \ - __builtin_ia32_t2rpntlvwz1rs(tdst, base, stride) -#define _tile_2rpntlvwz1rst1(tdst, base, stride) \ - __builtin_ia32_t2rpntlvwz1rst1(tdst, base, stride) - -static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rs_internal( - unsigned short row, unsigned short col0, unsigned short col1, - _tile1024i *dst0, _tile1024i *dst1, const void *base, - __SIZE_TYPE__ stride) { - // Use __tile1024i_1024a* to escape the alignment check in - // clang/test/Headers/x86-intrinsics-headers-clean.cpp - __builtin_ia32_t2rpntlvwz0rs_internal( - row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, - (__SIZE_TYPE__)(stride)); -} - -static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rst1_internal( - unsigned short row, unsigned short col0, unsigned short col1, - _tile1024i *dst0, _tile1024i *dst1, const void *base, - __SIZE_TYPE__ stride) { - __builtin_ia32_t2rpntlvwz0rst1_internal( - row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, - (__SIZE_TYPE__)(stride)); -} - -static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rs_internal( - unsigned short row, unsigned short col0, unsigned short col1, - _tile1024i *dst0, _tile1024i *dst1, const void *base, - __SIZE_TYPE__ stride) { - __builtin_ia32_t2rpntlvwz1rs_internal( - row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, - (__SIZE_TYPE__)(stride)); -} - -static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rst1_internal( - unsigned short row, unsigned short col0, unsigned short col1, - _tile1024i *dst0, _tile1024i *dst1, const void *base, - __SIZE_TYPE__ stride) { - __builtin_ia32_t2rpntlvwz1rst1_internal( - row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, - (__SIZE_TYPE__)(stride)); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. -/// Provides a hint to the implementation that the data will likely become -/// read shared in the near future and the data caching can be optimized. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the T2RPNTLVWZ0RS instruction. -/// -/// \param dst0 -/// First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -/// A pointer to base address. -/// \param stride -/// The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS -static void __tile_2rpntlvwz0rs(__tile1024i *dst0, __tile1024i *dst1, - const void *base, __SIZE_TYPE__ stride) { - _tile_2rpntlvwz0rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, - &dst1->tile, base, stride); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the T2RPNTLVWZ0T1RS instruction. -/// -/// \param dst0 -/// First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -/// A pointer to base address. -/// \param stride -/// The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS -static void __tile_2rpntlvwz0rst1(__tile1024i *dst0, __tile1024i *dst1, - const void *base, __SIZE_TYPE__ stride) { - _tile_2rpntlvwz0rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, - &dst1->tile, base, stride); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. The last row will be not be read from memory but instead -/// filled with zeros. -/// Provides a hint to the implementation that the data will likely become -/// read shared in the near future and the data caching can be optimized. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the T2RPNTLVWZ1 instruction. -/// -/// \param dst0 -/// First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -/// A pointer to base address. -/// \param stride -/// The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS -static void __tile_2rpntlvwz1rs(__tile1024i *dst0, __tile1024i *dst1, - const void *base, __SIZE_TYPE__ stride) { - _tile_2rpntlvwz1rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, - &dst1->tile, base, stride); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. The last row will be not be read from memory but instead -/// filled with zeros. -/// Provides a hint to the implementation that the data will likely become -/// read shared in the near future and the data caching can be optimized. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the T2RPNTLVWZ1T1RS instruction. -/// -/// \param dst0 -/// First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -/// A pointer to base address. -/// \param stride -/// The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS -static void __tile_2rpntlvwz1rst1(__tile1024i *dst0, __tile1024i *dst1, - const void *base, __SIZE_TYPE__ stride) { - _tile_2rpntlvwz1rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, - &dst1->tile, base, stride); -} - -#undef __DEFAULT_FN_ATTRS -#endif /* __x86_64__ */ -#endif /* __AMX_MOVRS_TRANSPOSEINTRIN_H */ diff --git a/lib/include/amxtf32transposeintrin.h b/lib/include/amxtf32transposeintrin.h deleted file mode 100644 index e1b90c1adf..0000000000 --- a/lib/include/amxtf32transposeintrin.h +++ /dev/null @@ -1,105 +0,0 @@ -/*===--------- amxtf32transposeintrin.h - AMX-TF32 and AMX-TRANSPOSE --------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===------------------------------------------------------------------------=== - */ -#ifndef __IMMINTRIN_H -#error \ - "Never use directly; include instead." -#endif // __IMMINTRIN_H - -#ifndef __AMX_TF32TRANSPOSEINTRIN_H -#define __AMX_TF32TRANSPOSEINTRIN_H -#ifdef __x86_64__ - -#define __DEFAULT_FN_ATTRS_TF32_TRANSPOSE \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("amx-tf32,amx-transpose"))) - -/// \code -/// void _tile_tmmultf32ps(constexpr int srcdst, constexpr int a, \ -/// constexpr int b); -/// \endcode -/// -/// This intrinsic corresponds to the TTMMULTF32PS instruction. -/// -/// \param srcdst -/// The destination tile. Max size is 1024 Bytes. -/// \param a -/// The 1st source tile. Max size is 1024 Bytes. -/// \param b -/// The 2nd source tile. Max size is 1024 Bytes. -/// -/// \code{.operation} -/// DEFINE zero_lower_mantissa_bits_fp32(x[31:0]) { -/// dword[12:0] := 0 -/// dword[31:13] := x[31:13] -/// return dword -/// } -/// -/// DEFINE silence_snan_fp32(x[31:0]) { -/// IF (x.exponent == 255 and x.fraction != 0 and x.fraction[22] == 0) -/// x.fraction[22] := 1 -/// return x -/// } -/// -/// elements_dest:= srcdst.colsb/4 -/// -/// FOR m := 0 TO (srcdst.rows-1) -/// tmp[511:0] := 0 -/// FOR k := 0 TO (a.rows-1) -/// FOR n := 0 TO (elements_dest-1) -/// a1e := silence_snan_fp32(a.row[k].fp32[m]) -/// a2e := silence_snan_fp32(b.row[k].fp32[n]) -/// s1e := zero_lower_mantissa_bits_fp32(a1e) -/// s2e := zero_lower_mantissa_bits_fp32(a2e) -/// tmp.fp32[n] += s1e * s2e -/// ENDFOR -/// ENDFOR -/// -/// FOR n := 0 TO (elements_dest-1) -/// tmp.fp32[n] += srcdst.row[m].fp32[n] -/// ENDFOR -/// write_row_and_zero(srcdst, m, tmp, srcdst.colsb) -/// -/// ENDFOR -/// -/// zero_upper_rows(srcdst, srcdst.rows) -/// zero_tileconfig_start() -/// \endcode -#define _tile_tmmultf32ps(srcdst, a, b) \ - __builtin_ia32_ttmmultf32ps((srcdst), (a), (b)) - -// dst = m x n (srcdest), src1 = k x m, src2 = k x n -static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TF32_TRANSPOSE -_tile_tmmultf32ps_internal(unsigned short m, unsigned short n, unsigned short k, - _tile1024i dst, _tile1024i src1, _tile1024i src2) { - return __builtin_ia32_ttmmultf32ps_internal(m, n, k, dst, src1, src2); -} - -/// Compute transpose and do Matrix Multiplication of src0 and src1, and then do -/// Matrix Plus with dst. All the calculation is base on float32 but with the -/// lower 13-bit set to 0. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the TTMMULTF32PS instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS_TF32_TRANSPOSE -static void __tile_tmmultf32ps(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { - dst->tile = _tile_tmmultf32ps_internal(src0.row, src1.col, src0.col, - dst->tile, src0.tile, src1.tile); -} - -#endif // __x86_64__ -#endif // __AMX_TF32TRANSPOSEINTRIN_H diff --git a/lib/include/amxtransposeintrin.h b/lib/include/amxtransposeintrin.h deleted file mode 100644 index b3fa37d766..0000000000 --- a/lib/include/amxtransposeintrin.h +++ /dev/null @@ -1,248 +0,0 @@ -/* ===--- amxtransposeintrin.h - AMX_TRANSPOSE intrinsics -*- C++ -*---------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * ===-----------------------------------------------------------------------=== - */ - -#ifndef __IMMINTRIN_H -#error "Never use directly; use instead." -#endif /* __IMMINTRIN_H */ - -#ifndef __AMX_TRANSPOSEINTRIN_H -#define __AMX_TRANSPOSEINTRIN_H -#ifdef __x86_64__ - -#define __DEFAULT_FN_ATTRS_TRANSPOSE \ - __attribute__((__always_inline__, __nodebug__, __target__("amx-transpose"))) - -#define _tile_2rpntlvwz0(tdst, base, stride) \ - __builtin_ia32_t2rpntlvwz0(tdst, base, stride) -#define _tile_2rpntlvwz0t1(tdst, base, stride) \ - __builtin_ia32_t2rpntlvwz0t1(tdst, base, stride) -#define _tile_2rpntlvwz1(tdst, base, stride) \ - __builtin_ia32_t2rpntlvwz1(tdst, base, stride) -#define _tile_2rpntlvwz1t1(tdst, base, stride) \ - __builtin_ia32_t2rpntlvwz1t1(tdst, base, stride) - -/// Transpose 32-bit elements from \a src and write the result to \a dst. -/// -/// \headerfile -/// -/// \code -/// void _tile_transposed(__tile dst, __tile src); -/// \endcode -/// -/// This intrinsic corresponds to the TTRANSPOSED instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src -/// The source tile. Max size is 1024 Bytes. -/// -/// \code{.operation} -/// -/// FOR i := 0 TO (dst.rows-1) -/// tmp[511:0] := 0 -/// FOR j := 0 TO (dst.colsb/4-1) -/// tmp.dword[j] := src.row[j].dword[i] -/// ENDFOR -/// dst.row[i] := tmp -/// ENDFOR -/// -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -#define _tile_transposed(dst, src) __builtin_ia32_ttransposed(dst, src) - -static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz0_internal( - unsigned short row, unsigned short col0, unsigned short col1, - _tile1024i *dst0, _tile1024i *dst1, const void *base, - __SIZE_TYPE__ stride) { - // Use __tile1024i_1024a* to escape the alignment check in - // clang/test/Headers/x86-intrinsics-headers-clean.cpp - __builtin_ia32_t2rpntlvwz0_internal(row, col0, col1, (_tile1024i_1024a *)dst0, - (_tile1024i_1024a *)dst1, base, - (__SIZE_TYPE__)(stride)); -} - -static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz0t1_internal( - unsigned short row, unsigned short col0, unsigned short col1, - _tile1024i *dst0, _tile1024i *dst1, const void *base, - __SIZE_TYPE__ stride) { - __builtin_ia32_t2rpntlvwz0t1_internal( - row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, - (__SIZE_TYPE__)(stride)); -} - -static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz1_internal( - unsigned short row, unsigned short col0, unsigned short col1, - _tile1024i *dst0, _tile1024i *dst1, const void *base, - __SIZE_TYPE__ stride) { - __builtin_ia32_t2rpntlvwz1_internal(row, col0, col1, (_tile1024i_1024a *)dst0, - (_tile1024i_1024a *)dst1, base, - (__SIZE_TYPE__)(stride)); -} - -static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz1t1_internal( - unsigned short row, unsigned short col0, unsigned short col1, - _tile1024i *dst0, _tile1024i *dst1, const void *base, - __SIZE_TYPE__ stride) { - __builtin_ia32_t2rpntlvwz1t1_internal( - row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, - (__SIZE_TYPE__)(stride)); -} - -// This is internal intrinsic. C/C++ user should avoid calling it directly. -static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TRANSPOSE -_tile_transposed_internal(unsigned short m, unsigned short n, _tile1024i src) { - return __builtin_ia32_ttransposed_internal(m, n, src); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. -/// Provides a hint to the implementation that the data will likely not be -/// reused in the near future and the data caching can be optimized. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the T2RPNTLVWZ0 instruction. -/// -/// \param dst0 -/// First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -/// A pointer to base address. -/// \param stride -/// The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS_TRANSPOSE -static void __tile_2rpntlvwz0(__tile1024i *dst0, __tile1024i *dst1, - const void *base, __SIZE_TYPE__ stride) { - _tile_2rpntlvwz0_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, - &dst1->tile, base, stride); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the T2RPNTLVWZ0T1 instruction. -/// -/// \param dst0 -/// First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -/// A pointer to base address. -/// \param stride -/// The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS_TRANSPOSE -static void __tile_2rpntlvwz0t1(__tile1024i *dst0, __tile1024i *dst1, - const void *base, __SIZE_TYPE__ stride) { - _tile_2rpntlvwz0t1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, - &dst1->tile, base, stride); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. The last row will be not be read from memory but instead -/// filled with zeros. -/// Provides a hint to the implementation that the data will likely not be -/// reused in the near future and the data caching can be optimized. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the T2RPNTLVWZ1 instruction. -/// -/// \param dst0 -/// First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -/// A pointer to base address. -/// \param stride -/// The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS_TRANSPOSE -static void __tile_2rpntlvwz1(__tile1024i *dst0, __tile1024i *dst1, - const void *base, __SIZE_TYPE__ stride) { - _tile_2rpntlvwz1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, - &dst1->tile, base, stride); -} - -/// Converts a pair of tiles from memory into VNNI format, and places the -/// results in a pair of destinations specified by dst. The pair of tiles -/// in memory is specified via a tsib; the second tile is after the first -/// one, separated by the same stride that separates each row. -/// The tile configuration for the destination tiles indicates the amount -/// of data to read from memory. The instruction will load a number of rows -/// that is equal to twice the number of rows in tmm1. The size of each row -/// is equal to the average width of the destination tiles. If the second -/// tile is configured with zero rows and columns, only the first tile will -/// be written. The last row will be not be read from memory but instead -/// filled with zeros. -/// Provides a hint to the implementation that the data will likely not be -/// reused in the near future and the data caching can be optimized. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the T2RPNTLVWZ1T1 instruction. -/// -/// \param dst0 -/// First tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param dst1 -/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. -/// \param base -/// A pointer to base address. -/// \param stride -/// The stride between the rows' data to be loaded in memory. -__DEFAULT_FN_ATTRS_TRANSPOSE -static void __tile_2rpntlvwz1t1(__tile1024i *dst0, __tile1024i *dst1, - const void *base, __SIZE_TYPE__ stride) { - _tile_2rpntlvwz1t1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, - &dst1->tile, base, stride); -} - -/// Transpose 32-bit elements from src and write the result to dst. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the TTRANSPOSED instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src -/// The source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS_TRANSPOSE -static void __tile_transposed(__tile1024i *dst, __tile1024i src) { - dst->tile = _tile_transposed_internal(dst->row, dst->col, src.tile); -} - -#endif /* __x86_64__ */ -#endif /* __AMX_TRANSPOSEINTRIN_H */ diff --git a/lib/include/arm_acle.h b/lib/include/arm_acle.h index 5cfa3d023a..622e8f3d6a 100644 --- a/lib/include/arm_acle.h +++ b/lib/include/arm_acle.h @@ -55,11 +55,37 @@ __chkfeat(uint64_t __features) { /* 7.5 Swap */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __swp(uint32_t __x, volatile uint32_t *__p) { - uint32_t v; - do - v = __builtin_arm_ldrex(__p); - while (__builtin_arm_strex(__x, __p)); - return v; + uint32_t __v; +#if (__ARM_FEATURE_LDREX & 4) || __ARM_ARCH_6M__ || __linux__ + /* + * Using this clang builtin is sensible in most situations. Where + * LDREX and STREX are available, it will compile to a loop using + * them. Otherwise it will compile to a libcall, requiring the + * runtime to provide that library function. + * + * That's unavoidable on Armv6-M, which has no atomic instructions + * at all (not even SWP), so in that situation the user will just + * have to provide an implementation of __atomic_exchange_4 (perhaps + * it would temporarily disable interrupts, and then do a separate + * load and store). + * + * We also use the libcall strategy on pre-Armv7 Linux targets, on + * the theory that Linux's runtime support library _will_ provide a + * suitable libcall, and it's better to use that than the SWP + * instruction because then when the same binary is run on a later + * Linux system the libcall implementation will use LDREX instead. + */ + __v = __atomic_exchange_n(__p, __x, __ATOMIC_RELAXED); +#else + /* + * But for older Arm architectures when the target is not Linux, we + * fall back to using the SWP instruction via inline assembler. ACLE + * is clear that we're allowed to do this, but shouldn't do it if we + * have a better alternative. + */ + __asm__("swp %0, %1, [%2]" : "=r"(__v) : "r"(__x), "r"(__p) : "memory"); +#endif + return __v; } /* 7.6 Memory prefetch intrinsics */ @@ -72,6 +98,12 @@ __swp(uint32_t __x, volatile uint32_t *__p) { #else #define __pldx(access_kind, cache_level, retention_policy, addr) \ __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1) +#define __pldx_range(access_kind, retention_policy, length, count, stride, \ + reuse_distance, addr) \ + __builtin_arm_range_prefetch_x(addr, access_kind, retention_policy, length, \ + count, stride, reuse_distance) +#define __pld_range(access_kind, retention_policy, metadata, addr) \ + __builtin_arm_range_prefetch(addr, access_kind, retention_policy, metadata) #endif /* 7.6.2 Instruction prefetch */ @@ -795,28 +827,6 @@ __arm_st64bv0(void *__addr, data512_t __value) { #endif // __ARM_FEATURE_COPROC -/* 17 Transactional Memory Extension (TME) Intrinsics */ -#if defined(__ARM_FEATURE_TME) && __ARM_FEATURE_TME - -#define _TMFAILURE_REASON 0x00007fffu -#define _TMFAILURE_RTRY 0x00008000u -#define _TMFAILURE_CNCL 0x00010000u -#define _TMFAILURE_MEM 0x00020000u -#define _TMFAILURE_IMP 0x00040000u -#define _TMFAILURE_ERR 0x00080000u -#define _TMFAILURE_SIZE 0x00100000u -#define _TMFAILURE_NEST 0x00200000u -#define _TMFAILURE_DBG 0x00400000u -#define _TMFAILURE_INT 0x00800000u -#define _TMFAILURE_TRIVIAL 0x01000000u - -#define __tstart() __builtin_arm_tstart() -#define __tcommit() __builtin_arm_tcommit() -#define __tcancel(__arg) __builtin_arm_tcancel(__arg) -#define __ttest() __builtin_arm_ttest() - -#endif /* __ARM_FEATURE_TME */ - /* 8.7 Armv8.5-A Random number generation intrinsics */ #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand"))) diff --git a/lib/include/arm_neon.h b/lib/include/arm_neon.h index 476158a2cb..392184c61b 100644 --- a/lib/include/arm_neon.h +++ b/lib/include/arm_neon.h @@ -10562,7 +10562,7 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vget_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x8_t __s0 = __p0; \ - __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vget_lane_i8(__s0, __p1)); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vget_lane_i8(__builtin_bit_cast(int8x8_t, __s0), __p1)); \ __ret; \ }) #else @@ -10570,13 +10570,13 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t poly8_t __ret; \ poly8x8_t __s0 = __p0; \ poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ - __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vget_lane_i8(__rev0, __p1)); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vget_lane_i8(__builtin_bit_cast(int8x8_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vget_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x8_t __s0 = __p0; \ - __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vget_lane_i8(__s0, __p1)); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vget_lane_i8(__builtin_bit_cast(int8x8_t, __s0), __p1)); \ __ret; \ }) #endif @@ -10585,7 +10585,7 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vget_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x4_t __s0 = __p0; \ - __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vget_lane_i16(__s0, __p1)); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vget_lane_i16(__builtin_bit_cast(int16x4_t, __s0), __p1)); \ __ret; \ }) #else @@ -10593,13 +10593,13 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t poly16_t __ret; \ poly16x4_t __s0 = __p0; \ poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ - __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vget_lane_i16(__rev0, __p1)); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vget_lane_i16(__builtin_bit_cast(int16x4_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vget_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x4_t __s0 = __p0; \ - __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vget_lane_i16(__s0, __p1)); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vget_lane_i16(__builtin_bit_cast(int16x4_t, __s0), __p1)); \ __ret; \ }) #endif @@ -10608,7 +10608,7 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vgetq_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x16_t __s0 = __p0; \ - __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vgetq_lane_i8(__s0, __p1)); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vgetq_lane_i8(__builtin_bit_cast(int8x16_t, __s0), __p1)); \ __ret; \ }) #else @@ -10616,13 +10616,13 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t poly8_t __ret; \ poly8x16_t __s0 = __p0; \ poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ - __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vgetq_lane_i8(__rev0, __p1)); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vgetq_lane_i8(__builtin_bit_cast(int8x16_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vgetq_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x16_t __s0 = __p0; \ - __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vgetq_lane_i8(__s0, __p1)); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vgetq_lane_i8(__builtin_bit_cast(int8x16_t, __s0), __p1)); \ __ret; \ }) #endif @@ -10631,7 +10631,7 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vgetq_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x8_t __s0 = __p0; \ - __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vgetq_lane_i16(__s0, __p1)); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vgetq_lane_i16(__builtin_bit_cast(int16x8_t, __s0), __p1)); \ __ret; \ }) #else @@ -10639,13 +10639,13 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t poly16_t __ret; \ poly16x8_t __s0 = __p0; \ poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ - __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vgetq_lane_i16(__rev0, __p1)); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vgetq_lane_i16(__builtin_bit_cast(int16x8_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vgetq_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x8_t __s0 = __p0; \ - __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vgetq_lane_i16(__s0, __p1)); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vgetq_lane_i16(__builtin_bit_cast(int16x8_t, __s0), __p1)); \ __ret; \ }) #endif @@ -27027,7 +27027,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly8x8_t __ret; \ poly8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ - __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vset_lane_i8(__s0, __s1, __p2)); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vset_lane_i8(__s0, __builtin_bit_cast(int8x8_t, __s1), __p2)); \ __ret; \ }) #else @@ -27036,7 +27036,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ - __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vset_lane_i8(__s0, __rev1, __p2)); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vset_lane_i8(__s0, __builtin_bit_cast(int8x8_t, __rev1), __p2)); \ __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) @@ -27044,7 +27044,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly8x8_t __ret; \ poly8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ - __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vset_lane_i8(__s0, __s1, __p2)); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vset_lane_i8(__s0, __builtin_bit_cast(int8x8_t, __s1), __p2)); \ __ret; \ }) #endif @@ -27054,7 +27054,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly16x4_t __ret; \ poly16_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ - __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vset_lane_i16(__s0, __s1, __p2)); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vset_lane_i16(__s0, __builtin_bit_cast(int16x4_t, __s1), __p2)); \ __ret; \ }) #else @@ -27063,7 +27063,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly16_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ - __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vset_lane_i16(__s0, __rev1, __p2)); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vset_lane_i16(__s0, __builtin_bit_cast(int16x4_t, __rev1), __p2)); \ __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) @@ -27071,7 +27071,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly16x4_t __ret; \ poly16_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ - __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vset_lane_i16(__s0, __s1, __p2)); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vset_lane_i16(__s0, __builtin_bit_cast(int16x4_t, __s1), __p2)); \ __ret; \ }) #endif @@ -27081,7 +27081,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly8x16_t __ret; \ poly8_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ - __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __s1, __p2)); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __builtin_bit_cast(int8x16_t, __s1), __p2)); \ __ret; \ }) #else @@ -27090,7 +27090,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly8_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ - __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __rev1, __p2)); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __builtin_bit_cast(int8x16_t, __rev1), __p2)); \ __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) @@ -27098,7 +27098,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly8x16_t __ret; \ poly8_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ - __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __s1, __p2)); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __builtin_bit_cast(int8x16_t, __s1), __p2)); \ __ret; \ }) #endif @@ -27108,7 +27108,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly16x8_t __ret; \ poly16_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ - __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __s1, __p2)); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __builtin_bit_cast(int16x8_t, __s1), __p2)); \ __ret; \ }) #else @@ -27117,7 +27117,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly16_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ - __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __rev1, __p2)); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __builtin_bit_cast(int16x8_t, __rev1), __p2)); \ __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) @@ -27125,7 +27125,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly16x8_t __ret; \ poly16_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ - __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __s1, __p2)); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __builtin_bit_cast(int16x8_t, __s1), __p2)); \ __ret; \ }) #endif @@ -41141,6 +41141,42 @@ __ai __attribute__((target("neon"))) float32x2_t vfms_f32(float32x2_t __p0, floa #endif #if defined(__aarch64__) +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("f8f16mm,neon"))) float16x8_t vmmlaq_f16_mf8_fpm(float16x8_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float16x8_t __ret; + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmmlaq_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __p0), __p1, __p2, __p3)); + return __ret; +} +#else +__ai __attribute__((target("f8f16mm,neon"))) float16x8_t vmmlaq_f16_mf8_fpm(float16x8_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float16x8_t __ret; + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmmlaq_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __rev0), __rev1, __rev2, __p3)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("f8f32mm,neon"))) float32x4_t vmmlaq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float32x4_t __ret; + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmmlaq_f32_mf8_fpm(__p0, __p1, __p2, __p3)); + return __ret; +} +#else +__ai __attribute__((target("f8f32mm,neon"))) float32x4_t vmmlaq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float32x4_t __ret; + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmmlaq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); + return __ret; +} +#endif + #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt1_bf16_mf8_fpm(mfloat8x8_t __p0, fpm_t __p1) { bfloat16x8_t __ret; @@ -49847,6 +49883,16 @@ __ai __attribute__((target("neon"))) int32_t vcvts_s32_f32(float32_t __p0) { __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvts_s32_f32(__p0)); return __ret; } +__ai __attribute__((target("neon"))) int32_t vcvtd_s32_f64(float64_t __p0) { + int32_t __ret; + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtd_s32_f64(__p0)); + return __ret; +} +__ai __attribute__((target("neon"))) int64_t vcvts_s64_f32(float32_t __p0) { + int64_t __ret; + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvts_s64_f32(__p0)); + return __ret; +} __ai __attribute__((target("neon"))) int64_t vcvtd_s64_f64(float64_t __p0) { int64_t __ret; __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtd_s64_f64(__p0)); @@ -49878,6 +49924,16 @@ __ai __attribute__((target("neon"))) uint32_t vcvts_u32_f32(float32_t __p0) { __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvts_u32_f32(__p0)); return __ret; } +__ai __attribute__((target("neon"))) uint32_t vcvtd_u32_f64(float64_t __p0) { + uint32_t __ret; + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtd_u32_f64(__p0)); + return __ret; +} +__ai __attribute__((target("neon"))) uint64_t vcvts_u64_f32(float32_t __p0) { + uint64_t __ret; + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvts_u64_f32(__p0)); + return __ret; +} __ai __attribute__((target("neon"))) uint64_t vcvtd_u64_f64(float64_t __p0) { uint64_t __ret; __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtd_u64_f64(__p0)); @@ -49909,6 +49965,11 @@ __ai __attribute__((target("neon"))) int32_t vcvtas_s32_f32(float32_t __p0) { __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtas_s32_f32(__p0)); return __ret; } +__ai __attribute__((target("neon"))) int32_t vcvtad_s32_f64(float64_t __p0) { + int32_t __ret; + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtad_s32_f64(__p0)); + return __ret; +} #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vcvtaq_s64_f64(float64x2_t __p0) { int64x2_t __ret; @@ -49930,6 +49991,11 @@ __ai __attribute__((target("neon"))) int64x1_t vcvta_s64_f64(float64x1_t __p0) { __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vcvta_s64_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } +__ai __attribute__((target("neon"))) int64_t vcvtas_s64_f32(float32_t __p0) { + int64_t __ret; + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtas_s64_f32(__p0)); + return __ret; +} __ai __attribute__((target("neon"))) int64_t vcvtad_s64_f64(float64_t __p0) { int64_t __ret; __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtad_s64_f64(__p0)); @@ -49940,6 +50006,11 @@ __ai __attribute__((target("neon"))) uint32_t vcvtas_u32_f32(float32_t __p0) { __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtas_u32_f32(__p0)); return __ret; } +__ai __attribute__((target("neon"))) uint32_t vcvtad_u32_f64(float64_t __p0) { + uint32_t __ret; + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtad_u32_f64(__p0)); + return __ret; +} #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcvtaq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; @@ -49961,6 +50032,11 @@ __ai __attribute__((target("neon"))) uint64x1_t vcvta_u64_f64(float64x1_t __p0) __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcvta_u64_v(__builtin_bit_cast(int8x8_t, __p0), 19)); return __ret; } +__ai __attribute__((target("neon"))) uint64_t vcvtas_u64_f32(float32_t __p0) { + uint64_t __ret; + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtas_u64_f32(__p0)); + return __ret; +} __ai __attribute__((target("neon"))) uint64_t vcvtad_u64_f64(float64_t __p0) { uint64_t __ret; __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtad_u64_f64(__p0)); @@ -49971,6 +50047,11 @@ __ai __attribute__((target("neon"))) int32_t vcvtms_s32_f32(float32_t __p0) { __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtms_s32_f32(__p0)); return __ret; } +__ai __attribute__((target("neon"))) int32_t vcvtmd_s32_f64(float64_t __p0) { + int32_t __ret; + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtmd_s32_f64(__p0)); + return __ret; +} #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vcvtmq_s64_f64(float64x2_t __p0) { int64x2_t __ret; @@ -49992,6 +50073,11 @@ __ai __attribute__((target("neon"))) int64x1_t vcvtm_s64_f64(float64x1_t __p0) { __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vcvtm_s64_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } +__ai __attribute__((target("neon"))) int64_t vcvtms_s64_f32(float32_t __p0) { + int64_t __ret; + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtms_s64_f32(__p0)); + return __ret; +} __ai __attribute__((target("neon"))) int64_t vcvtmd_s64_f64(float64_t __p0) { int64_t __ret; __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtmd_s64_f64(__p0)); @@ -50002,6 +50088,11 @@ __ai __attribute__((target("neon"))) uint32_t vcvtms_u32_f32(float32_t __p0) { __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtms_u32_f32(__p0)); return __ret; } +__ai __attribute__((target("neon"))) uint32_t vcvtmd_u32_f64(float64_t __p0) { + uint32_t __ret; + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtmd_u32_f64(__p0)); + return __ret; +} #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcvtmq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; @@ -50023,6 +50114,11 @@ __ai __attribute__((target("neon"))) uint64x1_t vcvtm_u64_f64(float64x1_t __p0) __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcvtm_u64_v(__builtin_bit_cast(int8x8_t, __p0), 19)); return __ret; } +__ai __attribute__((target("neon"))) uint64_t vcvtms_u64_f32(float32_t __p0) { + uint64_t __ret; + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtms_u64_f32(__p0)); + return __ret; +} __ai __attribute__((target("neon"))) uint64_t vcvtmd_u64_f64(float64_t __p0) { uint64_t __ret; __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtmd_u64_f64(__p0)); @@ -50033,6 +50129,11 @@ __ai __attribute__((target("neon"))) int32_t vcvtns_s32_f32(float32_t __p0) { __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtns_s32_f32(__p0)); return __ret; } +__ai __attribute__((target("neon"))) int32_t vcvtnd_s32_f64(float64_t __p0) { + int32_t __ret; + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtnd_s32_f64(__p0)); + return __ret; +} #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vcvtnq_s64_f64(float64x2_t __p0) { int64x2_t __ret; @@ -50054,6 +50155,11 @@ __ai __attribute__((target("neon"))) int64x1_t vcvtn_s64_f64(float64x1_t __p0) { __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vcvtn_s64_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } +__ai __attribute__((target("neon"))) int64_t vcvtns_s64_f32(float32_t __p0) { + int64_t __ret; + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtns_s64_f32(__p0)); + return __ret; +} __ai __attribute__((target("neon"))) int64_t vcvtnd_s64_f64(float64_t __p0) { int64_t __ret; __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtnd_s64_f64(__p0)); @@ -50064,6 +50170,11 @@ __ai __attribute__((target("neon"))) uint32_t vcvtns_u32_f32(float32_t __p0) { __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtns_u32_f32(__p0)); return __ret; } +__ai __attribute__((target("neon"))) uint32_t vcvtnd_u32_f64(float64_t __p0) { + uint32_t __ret; + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtnd_u32_f64(__p0)); + return __ret; +} #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcvtnq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; @@ -50085,6 +50196,11 @@ __ai __attribute__((target("neon"))) uint64x1_t vcvtn_u64_f64(float64x1_t __p0) __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcvtn_u64_v(__builtin_bit_cast(int8x8_t, __p0), 19)); return __ret; } +__ai __attribute__((target("neon"))) uint64_t vcvtns_u64_f32(float32_t __p0) { + uint64_t __ret; + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtns_u64_f32(__p0)); + return __ret; +} __ai __attribute__((target("neon"))) uint64_t vcvtnd_u64_f64(float64_t __p0) { uint64_t __ret; __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtnd_u64_f64(__p0)); @@ -50095,6 +50211,11 @@ __ai __attribute__((target("neon"))) int32_t vcvtps_s32_f32(float32_t __p0) { __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtps_s32_f32(__p0)); return __ret; } +__ai __attribute__((target("neon"))) int32_t vcvtpd_s32_f64(float64_t __p0) { + int32_t __ret; + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtpd_s32_f64(__p0)); + return __ret; +} #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vcvtpq_s64_f64(float64x2_t __p0) { int64x2_t __ret; @@ -50116,6 +50237,11 @@ __ai __attribute__((target("neon"))) int64x1_t vcvtp_s64_f64(float64x1_t __p0) { __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vcvtp_s64_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } +__ai __attribute__((target("neon"))) int64_t vcvtps_s64_f32(float32_t __p0) { + int64_t __ret; + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtps_s64_f32(__p0)); + return __ret; +} __ai __attribute__((target("neon"))) int64_t vcvtpd_s64_f64(float64_t __p0) { int64_t __ret; __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtpd_s64_f64(__p0)); @@ -50126,6 +50252,11 @@ __ai __attribute__((target("neon"))) uint32_t vcvtps_u32_f32(float32_t __p0) { __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtps_u32_f32(__p0)); return __ret; } +__ai __attribute__((target("neon"))) uint32_t vcvtpd_u32_f64(float64_t __p0) { + uint32_t __ret; + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtpd_u32_f64(__p0)); + return __ret; +} #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcvtpq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; @@ -50147,6 +50278,11 @@ __ai __attribute__((target("neon"))) uint64x1_t vcvtp_u64_f64(float64x1_t __p0) __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcvtp_u64_v(__builtin_bit_cast(int8x8_t, __p0), 19)); return __ret; } +__ai __attribute__((target("neon"))) uint64_t vcvtps_u64_f32(float32_t __p0) { + uint64_t __ret; + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtps_u64_f32(__p0)); + return __ret; +} __ai __attribute__((target("neon"))) uint64_t vcvtpd_u64_f64(float64_t __p0) { uint64_t __ret; __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtpd_u64_f64(__p0)); @@ -50255,7 +50391,7 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa #define vdupb_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x8_t __s0 = __p0; \ - __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_lane_i8(__s0, __p1)); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_lane_i8(__builtin_bit_cast(int8x8_t, __s0), __p1)); \ __ret; \ }) #else @@ -50263,7 +50399,7 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa poly8_t __ret; \ poly8x8_t __s0 = __p0; \ poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ - __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_lane_i8(__rev0, __p1)); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_lane_i8(__builtin_bit_cast(int8x8_t, __rev0), __p1)); \ __ret; \ }) #endif @@ -50272,7 +50408,7 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa #define vduph_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x4_t __s0 = __p0; \ - __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_lane_i16(__s0, __p1)); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_lane_i16(__builtin_bit_cast(int16x4_t, __s0), __p1)); \ __ret; \ }) #else @@ -50280,7 +50416,7 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa poly16_t __ret; \ poly16x4_t __s0 = __p0; \ poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ - __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_lane_i16(__rev0, __p1)); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_lane_i16(__builtin_bit_cast(int16x4_t, __rev0), __p1)); \ __ret; \ }) #endif @@ -50506,7 +50642,7 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa #define vdupb_laneq_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x16_t __s0 = __p0; \ - __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_laneq_i8(__s0, __p1)); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_laneq_i8(__builtin_bit_cast(int8x16_t, __s0), __p1)); \ __ret; \ }) #else @@ -50514,7 +50650,7 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa poly8_t __ret; \ poly8x16_t __s0 = __p0; \ poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ - __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_laneq_i8(__rev0, __p1)); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_laneq_i8(__builtin_bit_cast(int8x16_t, __rev0), __p1)); \ __ret; \ }) #endif @@ -50523,7 +50659,7 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa #define vduph_laneq_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x8_t __s0 = __p0; \ - __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_laneq_i16(__s0, __p1)); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_laneq_i16(__builtin_bit_cast(int16x8_t, __s0), __p1)); \ __ret; \ }) #else @@ -50531,7 +50667,7 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa poly16_t __ret; \ poly16x8_t __s0 = __p0; \ poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ - __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_laneq_i16(__rev0, __p1)); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_laneq_i16(__builtin_bit_cast(int16x8_t, __rev0), __p1)); \ __ret; \ }) #endif @@ -52105,14 +52241,14 @@ __ai __attribute__((target("neon"))) float64x1_t vget_high_f64(float64x2_t __p0) #define vget_lane_p64(__p0, __p1) __extension__ ({ \ poly64_t __ret; \ poly64x1_t __s0 = __p0; \ - __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vget_lane_i64(__s0, __p1)); \ + __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vget_lane_i64(__builtin_bit_cast(int64x1_t, __s0), __p1)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vgetq_lane_p64(__p0, __p1) __extension__ ({ \ poly64_t __ret; \ poly64x2_t __s0 = __p0; \ - __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vgetq_lane_i64(__s0, __p1)); \ + __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vgetq_lane_i64(__builtin_bit_cast(int64x2_t, __s0), __p1)); \ __ret; \ }) #else @@ -52120,13 +52256,13 @@ __ai __attribute__((target("neon"))) float64x1_t vget_high_f64(float64x2_t __p0) poly64_t __ret; \ poly64x2_t __s0 = __p0; \ poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ - __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vgetq_lane_i64(__rev0, __p1)); \ + __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vgetq_lane_i64(__builtin_bit_cast(int64x2_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vgetq_lane_p64(__p0, __p1) __extension__ ({ \ poly64_t __ret; \ poly64x2_t __s0 = __p0; \ - __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vgetq_lane_i64(__s0, __p1)); \ + __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vgetq_lane_i64(__builtin_bit_cast(int64x2_t, __s0), __p1)); \ __ret; \ }) #endif @@ -59743,20 +59879,20 @@ __ai __attribute__((target("neon"))) int16_t vqrshlh_s16(int16_t __p0, int16_t _ }) #ifdef __LITTLE_ENDIAN__ #define vqrshrun_high_n_s32(__p0_716, __p1_716, __p2_716) __extension__ ({ \ - int16x8_t __ret_716; \ - int16x4_t __s0_716 = __p0_716; \ + uint16x8_t __ret_716; \ + uint16x4_t __s0_716 = __p0_716; \ int32x4_t __s1_716 = __p1_716; \ - __ret_716 = __builtin_bit_cast(int16x8_t, vcombine_s16(__builtin_bit_cast(int16x4_t, __s0_716), __builtin_bit_cast(int16x4_t, vqrshrun_n_s32(__s1_716, __p2_716)))); \ + __ret_716 = __builtin_bit_cast(uint16x8_t, vcombine_u16(__builtin_bit_cast(uint16x4_t, __s0_716), __builtin_bit_cast(uint16x4_t, vqrshrun_n_s32(__s1_716, __p2_716)))); \ __ret_716; \ }) #else #define vqrshrun_high_n_s32(__p0_717, __p1_717, __p2_717) __extension__ ({ \ - int16x8_t __ret_717; \ - int16x4_t __s0_717 = __p0_717; \ + uint16x8_t __ret_717; \ + uint16x4_t __s0_717 = __p0_717; \ int32x4_t __s1_717 = __p1_717; \ - int16x4_t __rev0_717; __rev0_717 = __builtin_shufflevector(__s0_717, __s0_717, __lane_reverse_64_16); \ + uint16x4_t __rev0_717; __rev0_717 = __builtin_shufflevector(__s0_717, __s0_717, __lane_reverse_64_16); \ int32x4_t __rev1_717; __rev1_717 = __builtin_shufflevector(__s1_717, __s1_717, __lane_reverse_128_32); \ - __ret_717 = __builtin_bit_cast(int16x8_t, __noswap_vcombine_s16(__builtin_bit_cast(int16x4_t, __rev0_717), __builtin_bit_cast(int16x4_t, __noswap_vqrshrun_n_s32(__rev1_717, __p2_717)))); \ + __ret_717 = __builtin_bit_cast(uint16x8_t, __noswap_vcombine_u16(__builtin_bit_cast(uint16x4_t, __rev0_717), __builtin_bit_cast(uint16x4_t, __noswap_vqrshrun_n_s32(__rev1_717, __p2_717)))); \ __ret_717 = __builtin_shufflevector(__ret_717, __ret_717, __lane_reverse_128_16); \ __ret_717; \ }) @@ -59764,20 +59900,20 @@ __ai __attribute__((target("neon"))) int16_t vqrshlh_s16(int16_t __p0, int16_t _ #ifdef __LITTLE_ENDIAN__ #define vqrshrun_high_n_s64(__p0_718, __p1_718, __p2_718) __extension__ ({ \ - int32x4_t __ret_718; \ - int32x2_t __s0_718 = __p0_718; \ + uint32x4_t __ret_718; \ + uint32x2_t __s0_718 = __p0_718; \ int64x2_t __s1_718 = __p1_718; \ - __ret_718 = __builtin_bit_cast(int32x4_t, vcombine_s32(__builtin_bit_cast(int32x2_t, __s0_718), __builtin_bit_cast(int32x2_t, vqrshrun_n_s64(__s1_718, __p2_718)))); \ + __ret_718 = __builtin_bit_cast(uint32x4_t, vcombine_u32(__builtin_bit_cast(uint32x2_t, __s0_718), __builtin_bit_cast(uint32x2_t, vqrshrun_n_s64(__s1_718, __p2_718)))); \ __ret_718; \ }) #else #define vqrshrun_high_n_s64(__p0_719, __p1_719, __p2_719) __extension__ ({ \ - int32x4_t __ret_719; \ - int32x2_t __s0_719 = __p0_719; \ + uint32x4_t __ret_719; \ + uint32x2_t __s0_719 = __p0_719; \ int64x2_t __s1_719 = __p1_719; \ - int32x2_t __rev0_719; __rev0_719 = __builtin_shufflevector(__s0_719, __s0_719, __lane_reverse_64_32); \ + uint32x2_t __rev0_719; __rev0_719 = __builtin_shufflevector(__s0_719, __s0_719, __lane_reverse_64_32); \ int64x2_t __rev1_719; __rev1_719 = __builtin_shufflevector(__s1_719, __s1_719, __lane_reverse_128_64); \ - __ret_719 = __builtin_bit_cast(int32x4_t, __noswap_vcombine_s32(__builtin_bit_cast(int32x2_t, __rev0_719), __builtin_bit_cast(int32x2_t, __noswap_vqrshrun_n_s64(__rev1_719, __p2_719)))); \ + __ret_719 = __builtin_bit_cast(uint32x4_t, __noswap_vcombine_u32(__builtin_bit_cast(uint32x2_t, __rev0_719), __builtin_bit_cast(uint32x2_t, __noswap_vqrshrun_n_s64(__rev1_719, __p2_719)))); \ __ret_719 = __builtin_shufflevector(__ret_719, __ret_719, __lane_reverse_128_32); \ __ret_719; \ }) @@ -59785,20 +59921,20 @@ __ai __attribute__((target("neon"))) int16_t vqrshlh_s16(int16_t __p0, int16_t _ #ifdef __LITTLE_ENDIAN__ #define vqrshrun_high_n_s16(__p0_720, __p1_720, __p2_720) __extension__ ({ \ - int8x16_t __ret_720; \ - int8x8_t __s0_720 = __p0_720; \ + uint8x16_t __ret_720; \ + uint8x8_t __s0_720 = __p0_720; \ int16x8_t __s1_720 = __p1_720; \ - __ret_720 = __builtin_bit_cast(int8x16_t, vcombine_s8(__builtin_bit_cast(int8x8_t, __s0_720), __builtin_bit_cast(int8x8_t, vqrshrun_n_s16(__s1_720, __p2_720)))); \ + __ret_720 = __builtin_bit_cast(uint8x16_t, vcombine_u8(__builtin_bit_cast(uint8x8_t, __s0_720), __builtin_bit_cast(uint8x8_t, vqrshrun_n_s16(__s1_720, __p2_720)))); \ __ret_720; \ }) #else #define vqrshrun_high_n_s16(__p0_721, __p1_721, __p2_721) __extension__ ({ \ - int8x16_t __ret_721; \ - int8x8_t __s0_721 = __p0_721; \ + uint8x16_t __ret_721; \ + uint8x8_t __s0_721 = __p0_721; \ int16x8_t __s1_721 = __p1_721; \ - int8x8_t __rev0_721; __rev0_721 = __builtin_shufflevector(__s0_721, __s0_721, __lane_reverse_64_8); \ + uint8x8_t __rev0_721; __rev0_721 = __builtin_shufflevector(__s0_721, __s0_721, __lane_reverse_64_8); \ int16x8_t __rev1_721; __rev1_721 = __builtin_shufflevector(__s1_721, __s1_721, __lane_reverse_128_16); \ - __ret_721 = __builtin_bit_cast(int8x16_t, __noswap_vcombine_s8(__builtin_bit_cast(int8x8_t, __rev0_721), __builtin_bit_cast(int8x8_t, __noswap_vqrshrun_n_s16(__rev1_721, __p2_721)))); \ + __ret_721 = __builtin_bit_cast(uint8x16_t, __noswap_vcombine_u8(__builtin_bit_cast(uint8x8_t, __rev0_721), __builtin_bit_cast(uint8x8_t, __noswap_vqrshrun_n_s16(__rev1_721, __p2_721)))); \ __ret_721 = __builtin_shufflevector(__ret_721, __ret_721, __lane_reverse_128_8); \ __ret_721; \ }) @@ -60098,20 +60234,20 @@ __ai __attribute__((target("neon"))) int16_t vqshlh_s16(int16_t __p0, int16_t __ }) #ifdef __LITTLE_ENDIAN__ #define vqshrun_high_n_s32(__p0_734, __p1_734, __p2_734) __extension__ ({ \ - int16x8_t __ret_734; \ - int16x4_t __s0_734 = __p0_734; \ + uint16x8_t __ret_734; \ + uint16x4_t __s0_734 = __p0_734; \ int32x4_t __s1_734 = __p1_734; \ - __ret_734 = __builtin_bit_cast(int16x8_t, vcombine_s16(__builtin_bit_cast(int16x4_t, __s0_734), __builtin_bit_cast(int16x4_t, vqshrun_n_s32(__s1_734, __p2_734)))); \ + __ret_734 = __builtin_bit_cast(uint16x8_t, vcombine_u16(__builtin_bit_cast(uint16x4_t, __s0_734), __builtin_bit_cast(uint16x4_t, vqshrun_n_s32(__s1_734, __p2_734)))); \ __ret_734; \ }) #else #define vqshrun_high_n_s32(__p0_735, __p1_735, __p2_735) __extension__ ({ \ - int16x8_t __ret_735; \ - int16x4_t __s0_735 = __p0_735; \ + uint16x8_t __ret_735; \ + uint16x4_t __s0_735 = __p0_735; \ int32x4_t __s1_735 = __p1_735; \ - int16x4_t __rev0_735; __rev0_735 = __builtin_shufflevector(__s0_735, __s0_735, __lane_reverse_64_16); \ + uint16x4_t __rev0_735; __rev0_735 = __builtin_shufflevector(__s0_735, __s0_735, __lane_reverse_64_16); \ int32x4_t __rev1_735; __rev1_735 = __builtin_shufflevector(__s1_735, __s1_735, __lane_reverse_128_32); \ - __ret_735 = __builtin_bit_cast(int16x8_t, __noswap_vcombine_s16(__builtin_bit_cast(int16x4_t, __rev0_735), __builtin_bit_cast(int16x4_t, __noswap_vqshrun_n_s32(__rev1_735, __p2_735)))); \ + __ret_735 = __builtin_bit_cast(uint16x8_t, __noswap_vcombine_u16(__builtin_bit_cast(uint16x4_t, __rev0_735), __builtin_bit_cast(uint16x4_t, __noswap_vqshrun_n_s32(__rev1_735, __p2_735)))); \ __ret_735 = __builtin_shufflevector(__ret_735, __ret_735, __lane_reverse_128_16); \ __ret_735; \ }) @@ -60119,20 +60255,20 @@ __ai __attribute__((target("neon"))) int16_t vqshlh_s16(int16_t __p0, int16_t __ #ifdef __LITTLE_ENDIAN__ #define vqshrun_high_n_s64(__p0_736, __p1_736, __p2_736) __extension__ ({ \ - int32x4_t __ret_736; \ - int32x2_t __s0_736 = __p0_736; \ + uint32x4_t __ret_736; \ + uint32x2_t __s0_736 = __p0_736; \ int64x2_t __s1_736 = __p1_736; \ - __ret_736 = __builtin_bit_cast(int32x4_t, vcombine_s32(__builtin_bit_cast(int32x2_t, __s0_736), __builtin_bit_cast(int32x2_t, vqshrun_n_s64(__s1_736, __p2_736)))); \ + __ret_736 = __builtin_bit_cast(uint32x4_t, vcombine_u32(__builtin_bit_cast(uint32x2_t, __s0_736), __builtin_bit_cast(uint32x2_t, vqshrun_n_s64(__s1_736, __p2_736)))); \ __ret_736; \ }) #else #define vqshrun_high_n_s64(__p0_737, __p1_737, __p2_737) __extension__ ({ \ - int32x4_t __ret_737; \ - int32x2_t __s0_737 = __p0_737; \ + uint32x4_t __ret_737; \ + uint32x2_t __s0_737 = __p0_737; \ int64x2_t __s1_737 = __p1_737; \ - int32x2_t __rev0_737; __rev0_737 = __builtin_shufflevector(__s0_737, __s0_737, __lane_reverse_64_32); \ + uint32x2_t __rev0_737; __rev0_737 = __builtin_shufflevector(__s0_737, __s0_737, __lane_reverse_64_32); \ int64x2_t __rev1_737; __rev1_737 = __builtin_shufflevector(__s1_737, __s1_737, __lane_reverse_128_64); \ - __ret_737 = __builtin_bit_cast(int32x4_t, __noswap_vcombine_s32(__builtin_bit_cast(int32x2_t, __rev0_737), __builtin_bit_cast(int32x2_t, __noswap_vqshrun_n_s64(__rev1_737, __p2_737)))); \ + __ret_737 = __builtin_bit_cast(uint32x4_t, __noswap_vcombine_u32(__builtin_bit_cast(uint32x2_t, __rev0_737), __builtin_bit_cast(uint32x2_t, __noswap_vqshrun_n_s64(__rev1_737, __p2_737)))); \ __ret_737 = __builtin_shufflevector(__ret_737, __ret_737, __lane_reverse_128_32); \ __ret_737; \ }) @@ -60140,20 +60276,20 @@ __ai __attribute__((target("neon"))) int16_t vqshlh_s16(int16_t __p0, int16_t __ #ifdef __LITTLE_ENDIAN__ #define vqshrun_high_n_s16(__p0_738, __p1_738, __p2_738) __extension__ ({ \ - int8x16_t __ret_738; \ - int8x8_t __s0_738 = __p0_738; \ + uint8x16_t __ret_738; \ + uint8x8_t __s0_738 = __p0_738; \ int16x8_t __s1_738 = __p1_738; \ - __ret_738 = __builtin_bit_cast(int8x16_t, vcombine_s8(__builtin_bit_cast(int8x8_t, __s0_738), __builtin_bit_cast(int8x8_t, vqshrun_n_s16(__s1_738, __p2_738)))); \ + __ret_738 = __builtin_bit_cast(uint8x16_t, vcombine_u8(__builtin_bit_cast(uint8x8_t, __s0_738), __builtin_bit_cast(uint8x8_t, vqshrun_n_s16(__s1_738, __p2_738)))); \ __ret_738; \ }) #else #define vqshrun_high_n_s16(__p0_739, __p1_739, __p2_739) __extension__ ({ \ - int8x16_t __ret_739; \ - int8x8_t __s0_739 = __p0_739; \ + uint8x16_t __ret_739; \ + uint8x8_t __s0_739 = __p0_739; \ int16x8_t __s1_739 = __p1_739; \ - int8x8_t __rev0_739; __rev0_739 = __builtin_shufflevector(__s0_739, __s0_739, __lane_reverse_64_8); \ + uint8x8_t __rev0_739; __rev0_739 = __builtin_shufflevector(__s0_739, __s0_739, __lane_reverse_64_8); \ int16x8_t __rev1_739; __rev1_739 = __builtin_shufflevector(__s1_739, __s1_739, __lane_reverse_128_16); \ - __ret_739 = __builtin_bit_cast(int8x16_t, __noswap_vcombine_s8(__builtin_bit_cast(int8x8_t, __rev0_739), __builtin_bit_cast(int8x8_t, __noswap_vqshrun_n_s16(__rev1_739, __p2_739)))); \ + __ret_739 = __builtin_bit_cast(uint8x16_t, __noswap_vcombine_u8(__builtin_bit_cast(uint8x8_t, __rev0_739), __builtin_bit_cast(uint8x8_t, __noswap_vqshrun_n_s16(__rev1_739, __p2_739)))); \ __ret_739 = __builtin_shufflevector(__ret_739, __ret_739, __lane_reverse_128_8); \ __ret_739; \ }) @@ -64345,7 +64481,7 @@ __ai __attribute__((target("neon"))) int8x16_t vrsubhn_high_s16(int8x8_t __p0, i poly64x1_t __ret; \ poly64_t __s0 = __p0; \ poly64x1_t __s1 = __p1; \ - __ret = __builtin_bit_cast(poly64x1_t, __builtin_neon_vset_lane_i64(__s0, __s1, __p2)); \ + __ret = __builtin_bit_cast(poly64x1_t, __builtin_neon_vset_lane_i64(__s0, __builtin_bit_cast(int64x1_t, __s1), __p2)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -64353,7 +64489,7 @@ __ai __attribute__((target("neon"))) int8x16_t vrsubhn_high_s16(int8x8_t __p0, i poly64x2_t __ret; \ poly64_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ - __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __s1, __p2)); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __builtin_bit_cast(int64x2_t, __s1), __p2)); \ __ret; \ }) #else @@ -64362,7 +64498,7 @@ __ai __attribute__((target("neon"))) int8x16_t vrsubhn_high_s16(int8x8_t __p0, i poly64_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ - __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __rev1, __p2)); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __builtin_bit_cast(int64x2_t, __rev1), __p2)); \ __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) @@ -64370,7 +64506,7 @@ __ai __attribute__((target("neon"))) int8x16_t vrsubhn_high_s16(int8x8_t __p0, i poly64x2_t __ret; \ poly64_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ - __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __s1, __p2)); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __builtin_bit_cast(int64x2_t, __s1), __p2)); \ __ret; \ }) #endif diff --git a/lib/include/arm_sme.h b/lib/include/arm_sme.h index 6da5ca0b51..0983e4a58d 100644 --- a/lib/include/arm_sme.h +++ b/lib/include/arm_sme.h @@ -796,8 +796,6 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za16_mf8_vg1x2 void svdot_za16_mf8_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za16_mf8_vg1x4_fpm))) void svdot_za16_mf8_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8x4_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_mf8_vg2x1_fpm))) -void svmla_single_za16_mf8_vg2x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_mf8_vg2x2_fpm))) void svmla_single_za16_mf8_vg2x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, fpm_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_mf8_vg2x4_fpm))) @@ -808,6 +806,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_mf8_ void svmla_lane_za16_mf8_vg2x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_mf8_vg2x4_fpm))) void svmla_lane_za16_mf8_vg2x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_mf8_vg2x1_fpm))) +void svmla_za16_mf8_vg2x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_mf8_vg2x2_fpm))) void svmla_za16_mf8_vg2x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_mf8_vg2x4_fpm))) @@ -828,8 +828,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za16_mf8_vg1x void svdot_za16_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za16_mf8_vg1x4_fpm))) void svdot_za16_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8x4_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_mf8_vg2x1_fpm))) -void svmla_za16_vg2x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_mf8_vg2x2_fpm))) void svmla_za16_vg2x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, fpm_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_mf8_vg2x4_fpm))) @@ -840,6 +838,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_mf8 void svmla_lane_za16_vg2x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_mf8_vg2x4_fpm))) void svmla_lane_za16_vg2x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_mf8_vg2x1_fpm))) +void svmla_za16_vg2x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_mf8_vg2x2_fpm))) void svmla_za16_vg2x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_mf8_vg2x4_fpm))) @@ -860,8 +860,6 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_mf8_vg1x2 void svdot_za32_mf8_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_mf8_vg1x4_fpm))) void svdot_za32_mf8_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8x4_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_mf8_vg4x1_fpm))) -void svmla_single_za32_mf8_vg4x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_mf8_vg4x2_fpm))) void svmla_single_za32_mf8_vg4x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, fpm_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_mf8_vg4x4_fpm))) @@ -872,6 +870,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_mf8_ void svmla_lane_za32_mf8_vg4x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_mf8_vg4x4_fpm))) void svmla_lane_za32_mf8_vg4x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_mf8_vg4x1_fpm))) +void svmla_za32_mf8_vg4x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_mf8_vg4x2_fpm))) void svmla_za32_mf8_vg4x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_mf8_vg4x4_fpm))) @@ -894,8 +894,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_mf8_vg1x void svdot_za32_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_mf8_vg1x4_fpm))) void svdot_za32_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8x4_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_mf8_vg4x1_fpm))) -void svmla_za32_vg4x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_mf8_vg4x2_fpm))) void svmla_za32_vg4x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, fpm_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_mf8_vg4x4_fpm))) @@ -906,6 +904,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_mf8 void svmla_lane_za32_vg4x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_mf8_vg4x4_fpm))) void svmla_lane_za32_vg4x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_mf8_vg4x1_fpm))) +void svmla_za32_vg4x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_mf8_vg4x2_fpm))) void svmla_za32_vg4x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_mf8_vg4x4_fpm))) diff --git a/lib/include/arm_sve.h b/lib/include/arm_sve.h index 6a036be08c..d56bd34530 100644 --- a/lib/include/arm_sve.h +++ b/lib/include/arm_sve.h @@ -4617,6 +4617,86 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x2))) svbfloat16x2_t svminnm(svbfloat16x2_t, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x4))) svbfloat16x4_t svminnm(svbfloat16x4_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_bf16_x2))) +svbfloat16x2_t svmul_single_bf16_x2(svbfloat16x2_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_bf16_x4))) +svbfloat16x4_t svmul_single_bf16_x4(svbfloat16x4_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x2))) +svbfloat16x2_t svmul_bf16_x2(svbfloat16x2_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x4))) +svbfloat16x4_t svmul_bf16_x4(svbfloat16x4_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_bf16_x2))) +svbfloat16x2_t svscale_single_bf16_x2(svbfloat16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_bf16_x4))) +svbfloat16x4_t svscale_single_bf16_x4(svbfloat16x4_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_x2))) +svbfloat16x2_t svscale_bf16_x2(svbfloat16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_x4))) +svbfloat16x4_t svscale_bf16_x4(svbfloat16x4_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_bf16_x2))) +svbfloat16x2_t svmul(svbfloat16x2_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_bf16_x4))) +svbfloat16x4_t svmul(svbfloat16x4_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x2))) +svbfloat16x2_t svmul(svbfloat16x2_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x4))) +svbfloat16x4_t svmul(svbfloat16x4_t, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_bf16_x2))) +svbfloat16x2_t svscale(svbfloat16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_bf16_x4))) +svbfloat16x4_t svscale(svbfloat16x4_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_x2))) +svbfloat16x2_t svscale(svbfloat16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_x4))) +svbfloat16x4_t svscale(svbfloat16x4_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f64_x2))) +svfloat64x2_t svmul_single_f64_x2(svfloat64x2_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f32_x2))) +svfloat32x2_t svmul_single_f32_x2(svfloat32x2_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f16_x2))) +svfloat16x2_t svmul_single_f16_x2(svfloat16x2_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f64_x4))) +svfloat64x4_t svmul_single_f64_x4(svfloat64x4_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f32_x4))) +svfloat32x4_t svmul_single_f32_x4(svfloat32x4_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f16_x4))) +svfloat16x4_t svmul_single_f16_x4(svfloat16x4_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_x2))) +svfloat64x2_t svmul_f64_x2(svfloat64x2_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_x2))) +svfloat32x2_t svmul_f32_x2(svfloat32x2_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_x2))) +svfloat16x2_t svmul_f16_x2(svfloat16x2_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_x4))) +svfloat64x4_t svmul_f64_x4(svfloat64x4_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_x4))) +svfloat32x4_t svmul_f32_x4(svfloat32x4_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_x4))) +svfloat16x4_t svmul_f16_x4(svfloat16x4_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f64_x2))) +svfloat64x2_t svmul(svfloat64x2_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f32_x2))) +svfloat32x2_t svmul(svfloat32x2_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f16_x2))) +svfloat16x2_t svmul(svfloat16x2_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f64_x4))) +svfloat64x4_t svmul(svfloat64x4_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f32_x4))) +svfloat32x4_t svmul(svfloat32x4_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f16_x4))) +svfloat16x4_t svmul(svfloat16x4_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_x2))) +svfloat64x2_t svmul(svfloat64x2_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_x2))) +svfloat32x2_t svmul(svfloat32x2_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_x2))) +svfloat16x2_t svmul(svfloat16x2_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_x4))) +svfloat64x4_t svmul(svfloat64x4_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_x4))) +svfloat32x4_t svmul(svfloat32x4_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_x4))) +svfloat16x4_t svmul(svfloat16x4_t, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f64))) float64_t svadda_f64(svbool_t, float64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f32))) @@ -4655,18 +4735,6 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u32base_s32in svuint32_t svadrw_u32base_s32index(svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u64base_s64index))) svuint64_t svadrw_u64base_s64index(svuint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u32))) -svuint32_t svcompact_u32(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u64))) -svuint64_t svcompact_u64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f64))) -svfloat64_t svcompact_f64(svbool_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f32))) -svfloat32_t svcompact_f32(svbool_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s32))) -svint32_t svcompact_s32(svbool_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s64))) -svint64_t svcompact_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_index_u32))) svuint32_t svld1_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_index_u64))) @@ -5923,18 +5991,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u32base_s32i svuint32_t svadrw_index(svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u64base_s64index))) svuint64_t svadrw_index(svuint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u32))) -svuint32_t svcompact(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u64))) -svuint64_t svcompact(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f64))) -svfloat64_t svcompact(svbool_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f32))) -svfloat32_t svcompact(svbool_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s32))) -svint32_t svcompact(svbool_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s64))) -svint64_t svcompact(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_index_u32))) svuint32_t svld1_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_index_u64))) @@ -8777,3764 +8833,222 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_mf8))) svmfloat8_t svzipq2(svmfloat8_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s16))) svint16_t svzipq2(svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmmla_f32))) -svfloat32_t svbfmmla_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmmla_f32))) -svfloat32_t svbfmmla(svfloat32_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_n_f32))) -svfloat32_t svbfdot_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_f32))) -svfloat32_t svbfdot_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_lane_f32))) -svfloat32_t svbfdot_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_n_f32))) -svfloat32_t svbfmlalb_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_f32))) -svfloat32_t svbfmlalb_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_lane_f32))) -svfloat32_t svbfmlalb_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_n_f32))) -svfloat32_t svbfmlalt_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_f32))) -svfloat32_t svbfmlalt_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_lane_f32))) -svfloat32_t svbfmlalt_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_m))) -svbfloat16_t svcvt_bf16_f32_m(svbfloat16_t, svbool_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x))) -svbfloat16_t svcvt_bf16_f32_x(svbool_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_z))) -svbfloat16_t svcvt_bf16_f32_z(svbool_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_bf16_f32_m))) -svbfloat16_t svcvtnt_bf16_f32_m(svbfloat16_t, svbool_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_n_f32))) -svfloat32_t svbfdot(svfloat32_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_f32))) -svfloat32_t svbfdot(svfloat32_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_lane_f32))) -svfloat32_t svbfdot_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_n_f32))) -svfloat32_t svbfmlalb(svfloat32_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_f32))) -svfloat32_t svbfmlalb(svfloat32_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_lane_f32))) -svfloat32_t svbfmlalb_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_n_f32))) -svfloat32_t svbfmlalt(svfloat32_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_f32))) -svfloat32_t svbfmlalt(svfloat32_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_lane_f32))) -svfloat32_t svbfmlalt_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_m))) -svbfloat16_t svcvt_bf16_m(svbfloat16_t, svbool_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x))) -svbfloat16_t svcvt_bf16_x(svbool_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_z))) -svbfloat16_t svcvt_bf16_z(svbool_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_bf16_f32_m))) -svbfloat16_t svcvtnt_bf16_m(svbfloat16_t, svbool_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32))) -svfloat32_t svmmla_f32(svfloat32_t, svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32))) -svfloat32_t svmmla(svfloat32_t, svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u8))) -svuint8_t svld1ro_u8(svbool_t, uint8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u32))) -svuint32_t svld1ro_u32(svbool_t, uint32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u64))) -svuint64_t svld1ro_u64(svbool_t, uint64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u16))) -svuint16_t svld1ro_u16(svbool_t, uint16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_bf16))) -svbfloat16_t svld1ro_bf16(svbool_t, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s8))) -svint8_t svld1ro_s8(svbool_t, int8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f64))) -svfloat64_t svld1ro_f64(svbool_t, float64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f32))) -svfloat32_t svld1ro_f32(svbool_t, float32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f16))) -svfloat16_t svld1ro_f16(svbool_t, float16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s32))) -svint32_t svld1ro_s32(svbool_t, int32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s64))) -svint64_t svld1ro_s64(svbool_t, int64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_mf8))) -svmfloat8_t svld1ro_mf8(svbool_t, mfloat8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s16))) -svint16_t svld1ro_s16(svbool_t, int16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f64))) -svfloat64_t svmmla_f64(svfloat64_t, svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u8))) -svuint8_t svtrn1q_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u32))) -svuint32_t svtrn1q_u32(svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u64))) -svuint64_t svtrn1q_u64(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u16))) -svuint16_t svtrn1q_u16(svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_bf16))) -svbfloat16_t svtrn1q_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s8))) -svint8_t svtrn1q_s8(svint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f64))) -svfloat64_t svtrn1q_f64(svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f32))) -svfloat32_t svtrn1q_f32(svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f16))) -svfloat16_t svtrn1q_f16(svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s32))) -svint32_t svtrn1q_s32(svint32_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s64))) -svint64_t svtrn1q_s64(svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s16))) -svint16_t svtrn1q_s16(svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u8))) -svuint8_t svtrn2q_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u32))) -svuint32_t svtrn2q_u32(svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u64))) -svuint64_t svtrn2q_u64(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u16))) -svuint16_t svtrn2q_u16(svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_bf16))) -svbfloat16_t svtrn2q_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s8))) -svint8_t svtrn2q_s8(svint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f64))) -svfloat64_t svtrn2q_f64(svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f32))) -svfloat32_t svtrn2q_f32(svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f16))) -svfloat16_t svtrn2q_f16(svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s32))) -svint32_t svtrn2q_s32(svint32_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s64))) -svint64_t svtrn2q_s64(svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s16))) -svint16_t svtrn2q_s16(svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u8))) -svuint8_t svuzp1q_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u32))) -svuint32_t svuzp1q_u32(svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u64))) -svuint64_t svuzp1q_u64(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u16))) -svuint16_t svuzp1q_u16(svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_bf16))) -svbfloat16_t svuzp1q_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s8))) -svint8_t svuzp1q_s8(svint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f64))) -svfloat64_t svuzp1q_f64(svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f32))) -svfloat32_t svuzp1q_f32(svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f16))) -svfloat16_t svuzp1q_f16(svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s32))) -svint32_t svuzp1q_s32(svint32_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s64))) -svint64_t svuzp1q_s64(svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s16))) -svint16_t svuzp1q_s16(svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u8))) -svuint8_t svuzp2q_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u32))) -svuint32_t svuzp2q_u32(svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u64))) -svuint64_t svuzp2q_u64(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u16))) -svuint16_t svuzp2q_u16(svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_bf16))) -svbfloat16_t svuzp2q_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s8))) -svint8_t svuzp2q_s8(svint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f64))) -svfloat64_t svuzp2q_f64(svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f32))) -svfloat32_t svuzp2q_f32(svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f16))) -svfloat16_t svuzp2q_f16(svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s32))) -svint32_t svuzp2q_s32(svint32_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s64))) -svint64_t svuzp2q_s64(svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s16))) -svint16_t svuzp2q_s16(svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u8))) -svuint8_t svzip1q_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u32))) -svuint32_t svzip1q_u32(svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u64))) -svuint64_t svzip1q_u64(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u16))) -svuint16_t svzip1q_u16(svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_bf16))) -svbfloat16_t svzip1q_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s8))) -svint8_t svzip1q_s8(svint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f64))) -svfloat64_t svzip1q_f64(svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f32))) -svfloat32_t svzip1q_f32(svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f16))) -svfloat16_t svzip1q_f16(svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s32))) -svint32_t svzip1q_s32(svint32_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s64))) -svint64_t svzip1q_s64(svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s16))) -svint16_t svzip1q_s16(svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u8))) -svuint8_t svzip2q_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u32))) -svuint32_t svzip2q_u32(svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u64))) -svuint64_t svzip2q_u64(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u16))) -svuint16_t svzip2q_u16(svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_bf16))) -svbfloat16_t svzip2q_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s8))) -svint8_t svzip2q_s8(svint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f64))) -svfloat64_t svzip2q_f64(svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f32))) -svfloat32_t svzip2q_f32(svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f16))) -svfloat16_t svzip2q_f16(svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s32))) -svint32_t svzip2q_s32(svint32_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s64))) -svint64_t svzip2q_s64(svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s16))) -svint16_t svzip2q_s16(svint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u8))) -svuint8_t svld1ro(svbool_t, uint8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u32))) -svuint32_t svld1ro(svbool_t, uint32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u64))) -svuint64_t svld1ro(svbool_t, uint64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u16))) -svuint16_t svld1ro(svbool_t, uint16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_bf16))) -svbfloat16_t svld1ro(svbool_t, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s8))) -svint8_t svld1ro(svbool_t, int8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f64))) -svfloat64_t svld1ro(svbool_t, float64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f32))) -svfloat32_t svld1ro(svbool_t, float32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f16))) -svfloat16_t svld1ro(svbool_t, float16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s32))) -svint32_t svld1ro(svbool_t, int32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s64))) -svint64_t svld1ro(svbool_t, int64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_mf8))) -svmfloat8_t svld1ro(svbool_t, mfloat8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s16))) -svint16_t svld1ro(svbool_t, int16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f64))) -svfloat64_t svmmla(svfloat64_t, svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u8))) -svuint8_t svtrn1q(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u32))) -svuint32_t svtrn1q(svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u64))) -svuint64_t svtrn1q(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u16))) -svuint16_t svtrn1q(svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_bf16))) -svbfloat16_t svtrn1q(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s8))) -svint8_t svtrn1q(svint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f64))) -svfloat64_t svtrn1q(svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f32))) -svfloat32_t svtrn1q(svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f16))) -svfloat16_t svtrn1q(svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s32))) -svint32_t svtrn1q(svint32_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s64))) -svint64_t svtrn1q(svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s16))) -svint16_t svtrn1q(svint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u8))) -svuint8_t svtrn2q(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u32))) -svuint32_t svtrn2q(svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u64))) -svuint64_t svtrn2q(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u16))) -svuint16_t svtrn2q(svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_bf16))) -svbfloat16_t svtrn2q(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s8))) -svint8_t svtrn2q(svint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f64))) -svfloat64_t svtrn2q(svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f32))) -svfloat32_t svtrn2q(svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f16))) -svfloat16_t svtrn2q(svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s32))) -svint32_t svtrn2q(svint32_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s64))) -svint64_t svtrn2q(svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s16))) -svint16_t svtrn2q(svint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u8))) -svuint8_t svuzp1q(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u32))) -svuint32_t svuzp1q(svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u64))) -svuint64_t svuzp1q(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u16))) -svuint16_t svuzp1q(svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_bf16))) -svbfloat16_t svuzp1q(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s8))) -svint8_t svuzp1q(svint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f64))) -svfloat64_t svuzp1q(svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f32))) -svfloat32_t svuzp1q(svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f16))) -svfloat16_t svuzp1q(svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s32))) -svint32_t svuzp1q(svint32_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s64))) -svint64_t svuzp1q(svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s16))) -svint16_t svuzp1q(svint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u8))) -svuint8_t svuzp2q(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u32))) -svuint32_t svuzp2q(svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u64))) -svuint64_t svuzp2q(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u16))) -svuint16_t svuzp2q(svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_bf16))) -svbfloat16_t svuzp2q(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s8))) -svint8_t svuzp2q(svint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f64))) -svfloat64_t svuzp2q(svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f32))) -svfloat32_t svuzp2q(svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f16))) -svfloat16_t svuzp2q(svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s32))) -svint32_t svuzp2q(svint32_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s64))) -svint64_t svuzp2q(svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s16))) -svint16_t svuzp2q(svint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u8))) -svuint8_t svzip1q(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u32))) -svuint32_t svzip1q(svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u64))) -svuint64_t svzip1q(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u16))) -svuint16_t svzip1q(svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_bf16))) -svbfloat16_t svzip1q(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s8))) -svint8_t svzip1q(svint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f64))) -svfloat64_t svzip1q(svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f32))) -svfloat32_t svzip1q(svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f16))) -svfloat16_t svzip1q(svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s32))) -svint32_t svzip1q(svint32_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s64))) -svint64_t svzip1q(svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s16))) -svint16_t svzip1q(svint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u8))) -svuint8_t svzip2q(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u32))) -svuint32_t svzip2q(svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u64))) -svuint64_t svzip2q(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u16))) -svuint16_t svzip2q(svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_bf16))) -svbfloat16_t svzip2q(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s8))) -svint8_t svzip2q(svint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f64))) -svfloat64_t svzip2q(svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f32))) -svfloat32_t svzip2q(svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f16))) -svfloat16_t svzip2q(svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s32))) -svint32_t svzip2q(svint32_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s64))) -svint64_t svzip2q(svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s16))) -svint16_t svzip2q(svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_s32))) -svint32_t svmmla_s32(svint32_t, svint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_u32))) -svuint32_t svmmla_u32(svuint32_t, svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusmmla_s32))) -svint32_t svusmmla_s32(svint32_t, svuint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_s32))) -svint32_t svmmla(svint32_t, svint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_u32))) -svuint32_t svmmla(svuint32_t, svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusmmla_s32))) -svint32_t svusmmla(svint32_t, svuint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_n_s32))) -svint32_t svsudot_n_s32(svint32_t, svint8_t, uint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_s32))) -svint32_t svsudot_s32(svint32_t, svint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_lane_s32))) -svint32_t svsudot_lane_s32(svint32_t, svint8_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_n_s32))) -svint32_t svusdot_n_s32(svint32_t, svuint8_t, int8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_s32))) -svint32_t svusdot_s32(svint32_t, svuint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_lane_s32))) -svint32_t svusdot_lane_s32(svint32_t, svuint8_t, svint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_n_s32))) -svint32_t svsudot(svint32_t, svint8_t, uint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_s32))) -svint32_t svsudot(svint32_t, svint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_lane_s32))) -svint32_t svsudot_lane(svint32_t, svint8_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_n_s32))) -svint32_t svusdot(svint32_t, svuint8_t, int8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_s32))) -svint32_t svusdot(svint32_t, svuint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_lane_s32))) -svint32_t svusdot_lane(svint32_t, svuint8_t, svint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_u64))) -svuint64_t svrax1_u64(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_s64))) -svint64_t svrax1_s64(svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_u64))) -svuint64_t svrax1(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_s64))) -svint64_t svrax1(svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u32_z))) -svuint32_t svhistcnt_u32_z(svbool_t, svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u64_z))) -svuint64_t svhistcnt_u64_z(svbool_t, svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s32_z))) -svuint32_t svhistcnt_s32_z(svbool_t, svint32_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s64_z))) -svuint64_t svhistcnt_s64_z(svbool_t, svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_u8))) -svuint8_t svhistseg_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_s8))) -svuint8_t svhistseg_s8(svint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_u32))) -svuint32_t svldnt1_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_u64))) -svuint64_t svldnt1_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_f64))) -svfloat64_t svldnt1_gather_u64base_index_f64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_f32))) -svfloat32_t svldnt1_gather_u32base_index_f32(svbool_t, svuint32_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_s32))) -svint32_t svldnt1_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_s64))) -svint64_t svldnt1_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_u32))) -svuint32_t svldnt1_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_u64))) -svuint64_t svldnt1_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_f64))) -svfloat64_t svldnt1_gather_u64base_offset_f64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_f32))) -svfloat32_t svldnt1_gather_u32base_offset_f32(svbool_t, svuint32_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_s32))) -svint32_t svldnt1_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_s64))) -svint64_t svldnt1_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_u32))) -svuint32_t svldnt1_gather_u32base_u32(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_u64))) -svuint64_t svldnt1_gather_u64base_u64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_f64))) -svfloat64_t svldnt1_gather_u64base_f64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_f32))) -svfloat32_t svldnt1_gather_u32base_f32(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_s32))) -svint32_t svldnt1_gather_u32base_s32(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_s64))) -svint64_t svldnt1_gather_u64base_s64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_u64))) -svuint64_t svldnt1_gather_s64index_u64(svbool_t, uint64_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_f64))) -svfloat64_t svldnt1_gather_s64index_f64(svbool_t, float64_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_s64))) -svint64_t svldnt1_gather_s64index_s64(svbool_t, int64_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_u64))) -svuint64_t svldnt1_gather_u64index_u64(svbool_t, uint64_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_f64))) -svfloat64_t svldnt1_gather_u64index_f64(svbool_t, float64_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_s64))) -svint64_t svldnt1_gather_u64index_s64(svbool_t, int64_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_u32))) -svuint32_t svldnt1_gather_u32offset_u32(svbool_t, uint32_t const *, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_f32))) -svfloat32_t svldnt1_gather_u32offset_f32(svbool_t, float32_t const *, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_s32))) -svint32_t svldnt1_gather_u32offset_s32(svbool_t, int32_t const *, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_u64))) -svuint64_t svldnt1_gather_s64offset_u64(svbool_t, uint64_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_f64))) -svfloat64_t svldnt1_gather_s64offset_f64(svbool_t, float64_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_s64))) -svint64_t svldnt1_gather_s64offset_s64(svbool_t, int64_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_u64))) -svuint64_t svldnt1_gather_u64offset_u64(svbool_t, uint64_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_f64))) -svfloat64_t svldnt1_gather_u64offset_f64(svbool_t, float64_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_s64))) -svint64_t svldnt1_gather_u64offset_s64(svbool_t, int64_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_u32))) -svuint32_t svldnt1sb_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_u64))) -svuint64_t svldnt1sb_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_s32))) -svint32_t svldnt1sb_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_s64))) -svint64_t svldnt1sb_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_u32))) -svuint32_t svldnt1sb_gather_u32base_u32(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_u64))) -svuint64_t svldnt1sb_gather_u64base_u64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_s32))) -svint32_t svldnt1sb_gather_u32base_s32(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_s64))) -svint64_t svldnt1sb_gather_u64base_s64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_u32))) -svuint32_t svldnt1sb_gather_u32offset_u32(svbool_t, int8_t const *, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_s32))) -svint32_t svldnt1sb_gather_u32offset_s32(svbool_t, int8_t const *, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_u64))) -svuint64_t svldnt1sb_gather_s64offset_u64(svbool_t, int8_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_s64))) -svint64_t svldnt1sb_gather_s64offset_s64(svbool_t, int8_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_u64))) -svuint64_t svldnt1sb_gather_u64offset_u64(svbool_t, int8_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_s64))) -svint64_t svldnt1sb_gather_u64offset_s64(svbool_t, int8_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_u32))) -svuint32_t svldnt1sh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_u64))) -svuint64_t svldnt1sh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_s32))) -svint32_t svldnt1sh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_s64))) -svint64_t svldnt1sh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_u32))) -svuint32_t svldnt1sh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_u64))) -svuint64_t svldnt1sh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_s32))) -svint32_t svldnt1sh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_s64))) -svint64_t svldnt1sh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_u32))) -svuint32_t svldnt1sh_gather_u32base_u32(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_u64))) -svuint64_t svldnt1sh_gather_u64base_u64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_s32))) -svint32_t svldnt1sh_gather_u32base_s32(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_s64))) -svint64_t svldnt1sh_gather_u64base_s64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_u64))) -svuint64_t svldnt1sh_gather_s64index_u64(svbool_t, int16_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_s64))) -svint64_t svldnt1sh_gather_s64index_s64(svbool_t, int16_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_u64))) -svuint64_t svldnt1sh_gather_u64index_u64(svbool_t, int16_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_s64))) -svint64_t svldnt1sh_gather_u64index_s64(svbool_t, int16_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_u32))) -svuint32_t svldnt1sh_gather_u32offset_u32(svbool_t, int16_t const *, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_s32))) -svint32_t svldnt1sh_gather_u32offset_s32(svbool_t, int16_t const *, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_u64))) -svuint64_t svldnt1sh_gather_s64offset_u64(svbool_t, int16_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_s64))) -svint64_t svldnt1sh_gather_s64offset_s64(svbool_t, int16_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_u64))) -svuint64_t svldnt1sh_gather_u64offset_u64(svbool_t, int16_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_s64))) -svint64_t svldnt1sh_gather_u64offset_s64(svbool_t, int16_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_u64))) -svuint64_t svldnt1sw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_s64))) -svint64_t svldnt1sw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_u64))) -svuint64_t svldnt1sw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_s64))) -svint64_t svldnt1sw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_u64))) -svuint64_t svldnt1sw_gather_u64base_u64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_s64))) -svint64_t svldnt1sw_gather_u64base_s64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_u64))) -svuint64_t svldnt1sw_gather_s64index_u64(svbool_t, int32_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_s64))) -svint64_t svldnt1sw_gather_s64index_s64(svbool_t, int32_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_u64))) -svuint64_t svldnt1sw_gather_u64index_u64(svbool_t, int32_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_s64))) -svint64_t svldnt1sw_gather_u64index_s64(svbool_t, int32_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_u64))) -svuint64_t svldnt1sw_gather_s64offset_u64(svbool_t, int32_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_s64))) -svint64_t svldnt1sw_gather_s64offset_s64(svbool_t, int32_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_u64))) -svuint64_t svldnt1sw_gather_u64offset_u64(svbool_t, int32_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_s64))) -svint64_t svldnt1sw_gather_u64offset_s64(svbool_t, int32_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_u32))) -svuint32_t svldnt1ub_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_u64))) -svuint64_t svldnt1ub_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_s32))) -svint32_t svldnt1ub_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_s64))) -svint64_t svldnt1ub_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_u32))) -svuint32_t svldnt1ub_gather_u32base_u32(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_u64))) -svuint64_t svldnt1ub_gather_u64base_u64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_s32))) -svint32_t svldnt1ub_gather_u32base_s32(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_s64))) -svint64_t svldnt1ub_gather_u64base_s64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_u32))) -svuint32_t svldnt1ub_gather_u32offset_u32(svbool_t, uint8_t const *, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_s32))) -svint32_t svldnt1ub_gather_u32offset_s32(svbool_t, uint8_t const *, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_u64))) -svuint64_t svldnt1ub_gather_s64offset_u64(svbool_t, uint8_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_s64))) -svint64_t svldnt1ub_gather_s64offset_s64(svbool_t, uint8_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_u64))) -svuint64_t svldnt1ub_gather_u64offset_u64(svbool_t, uint8_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_s64))) -svint64_t svldnt1ub_gather_u64offset_s64(svbool_t, uint8_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_u32))) -svuint32_t svldnt1uh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_u64))) -svuint64_t svldnt1uh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_s32))) -svint32_t svldnt1uh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_s64))) -svint64_t svldnt1uh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_u32))) -svuint32_t svldnt1uh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_u64))) -svuint64_t svldnt1uh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_s32))) -svint32_t svldnt1uh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_s64))) -svint64_t svldnt1uh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_u32))) -svuint32_t svldnt1uh_gather_u32base_u32(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_u64))) -svuint64_t svldnt1uh_gather_u64base_u64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_s32))) -svint32_t svldnt1uh_gather_u32base_s32(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_s64))) -svint64_t svldnt1uh_gather_u64base_s64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_u64))) -svuint64_t svldnt1uh_gather_s64index_u64(svbool_t, uint16_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_s64))) -svint64_t svldnt1uh_gather_s64index_s64(svbool_t, uint16_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_u64))) -svuint64_t svldnt1uh_gather_u64index_u64(svbool_t, uint16_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_s64))) -svint64_t svldnt1uh_gather_u64index_s64(svbool_t, uint16_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_u32))) -svuint32_t svldnt1uh_gather_u32offset_u32(svbool_t, uint16_t const *, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_s32))) -svint32_t svldnt1uh_gather_u32offset_s32(svbool_t, uint16_t const *, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_u64))) -svuint64_t svldnt1uh_gather_s64offset_u64(svbool_t, uint16_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_s64))) -svint64_t svldnt1uh_gather_s64offset_s64(svbool_t, uint16_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_u64))) -svuint64_t svldnt1uh_gather_u64offset_u64(svbool_t, uint16_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_s64))) -svint64_t svldnt1uh_gather_u64offset_s64(svbool_t, uint16_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_u64))) -svuint64_t svldnt1uw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_s64))) -svint64_t svldnt1uw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_u64))) -svuint64_t svldnt1uw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_s64))) -svint64_t svldnt1uw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_u64))) -svuint64_t svldnt1uw_gather_u64base_u64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_s64))) -svint64_t svldnt1uw_gather_u64base_s64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_u64))) -svuint64_t svldnt1uw_gather_s64index_u64(svbool_t, uint32_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_s64))) -svint64_t svldnt1uw_gather_s64index_s64(svbool_t, uint32_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_u64))) -svuint64_t svldnt1uw_gather_u64index_u64(svbool_t, uint32_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_s64))) -svint64_t svldnt1uw_gather_u64index_s64(svbool_t, uint32_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_u64))) -svuint64_t svldnt1uw_gather_s64offset_u64(svbool_t, uint32_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_s64))) -svint64_t svldnt1uw_gather_s64offset_s64(svbool_t, uint32_t const *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_u64))) -svuint64_t svldnt1uw_gather_u64offset_u64(svbool_t, uint32_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_s64))) -svint64_t svldnt1uw_gather_u64offset_s64(svbool_t, uint32_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u8))) -svbool_t svmatch_u8(svbool_t, svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u16))) -svbool_t svmatch_u16(svbool_t, svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s8))) -svbool_t svmatch_s8(svbool_t, svint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s16))) -svbool_t svmatch_s16(svbool_t, svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u8))) -svbool_t svnmatch_u8(svbool_t, svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u16))) -svbool_t svnmatch_u16(svbool_t, svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s8))) -svbool_t svnmatch_s8(svbool_t, svint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s16))) -svbool_t svnmatch_s16(svbool_t, svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_u32))) -void svstnt1_scatter_u32base_index_u32(svbool_t, svuint32_t, int64_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_u64))) -void svstnt1_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_f64))) -void svstnt1_scatter_u64base_index_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_f32))) -void svstnt1_scatter_u32base_index_f32(svbool_t, svuint32_t, int64_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_s32))) -void svstnt1_scatter_u32base_index_s32(svbool_t, svuint32_t, int64_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_s64))) -void svstnt1_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_u32))) -void svstnt1_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_u64))) -void svstnt1_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_f64))) -void svstnt1_scatter_u64base_offset_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_f32))) -void svstnt1_scatter_u32base_offset_f32(svbool_t, svuint32_t, int64_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_s32))) -void svstnt1_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_s64))) -void svstnt1_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_u32))) -void svstnt1_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_u64))) -void svstnt1_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_f64))) -void svstnt1_scatter_u64base_f64(svbool_t, svuint64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_f32))) -void svstnt1_scatter_u32base_f32(svbool_t, svuint32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_s32))) -void svstnt1_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_s64))) -void svstnt1_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_u64))) -void svstnt1_scatter_s64index_u64(svbool_t, uint64_t *, svint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_f64))) -void svstnt1_scatter_s64index_f64(svbool_t, float64_t *, svint64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_s64))) -void svstnt1_scatter_s64index_s64(svbool_t, int64_t *, svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_u64))) -void svstnt1_scatter_u64index_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_f64))) -void svstnt1_scatter_u64index_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_s64))) -void svstnt1_scatter_u64index_s64(svbool_t, int64_t *, svuint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_u32))) -void svstnt1_scatter_u32offset_u32(svbool_t, uint32_t *, svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_f32))) -void svstnt1_scatter_u32offset_f32(svbool_t, float32_t *, svuint32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_s32))) -void svstnt1_scatter_u32offset_s32(svbool_t, int32_t *, svuint32_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_u64))) -void svstnt1_scatter_s64offset_u64(svbool_t, uint64_t *, svint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_f64))) -void svstnt1_scatter_s64offset_f64(svbool_t, float64_t *, svint64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_s64))) -void svstnt1_scatter_s64offset_s64(svbool_t, int64_t *, svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_u64))) -void svstnt1_scatter_u64offset_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_f64))) -void svstnt1_scatter_u64offset_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_s64))) -void svstnt1_scatter_u64offset_s64(svbool_t, int64_t *, svuint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_u32))) -void svstnt1b_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_u64))) -void svstnt1b_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_s32))) -void svstnt1b_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_s64))) -void svstnt1b_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_u32))) -void svstnt1b_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_u64))) -void svstnt1b_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_s32))) -void svstnt1b_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_s64))) -void svstnt1b_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_s32))) -void svstnt1b_scatter_u32offset_s32(svbool_t, int8_t *, svuint32_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_u32))) -void svstnt1b_scatter_u32offset_u32(svbool_t, uint8_t *, svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_s64))) -void svstnt1b_scatter_s64offset_s64(svbool_t, int8_t *, svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_u64))) -void svstnt1b_scatter_s64offset_u64(svbool_t, uint8_t *, svint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_s64))) -void svstnt1b_scatter_u64offset_s64(svbool_t, int8_t *, svuint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_u64))) -void svstnt1b_scatter_u64offset_u64(svbool_t, uint8_t *, svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_u32))) -void svstnt1h_scatter_u32base_index_u32(svbool_t, svuint32_t, int64_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_u64))) -void svstnt1h_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_s32))) -void svstnt1h_scatter_u32base_index_s32(svbool_t, svuint32_t, int64_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_s64))) -void svstnt1h_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_u32))) -void svstnt1h_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_u64))) -void svstnt1h_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_s32))) -void svstnt1h_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_s64))) -void svstnt1h_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_u32))) -void svstnt1h_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_u64))) -void svstnt1h_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_s32))) -void svstnt1h_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_s64))) -void svstnt1h_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_s64))) -void svstnt1h_scatter_s64index_s64(svbool_t, int16_t *, svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_u64))) -void svstnt1h_scatter_s64index_u64(svbool_t, uint16_t *, svint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_s64))) -void svstnt1h_scatter_u64index_s64(svbool_t, int16_t *, svuint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_u64))) -void svstnt1h_scatter_u64index_u64(svbool_t, uint16_t *, svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_s32))) -void svstnt1h_scatter_u32offset_s32(svbool_t, int16_t *, svuint32_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_u32))) -void svstnt1h_scatter_u32offset_u32(svbool_t, uint16_t *, svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_s64))) -void svstnt1h_scatter_s64offset_s64(svbool_t, int16_t *, svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_u64))) -void svstnt1h_scatter_s64offset_u64(svbool_t, uint16_t *, svint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_s64))) -void svstnt1h_scatter_u64offset_s64(svbool_t, int16_t *, svuint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_u64))) -void svstnt1h_scatter_u64offset_u64(svbool_t, uint16_t *, svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_u64))) -void svstnt1w_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_s64))) -void svstnt1w_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_u64))) -void svstnt1w_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_s64))) -void svstnt1w_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_u64))) -void svstnt1w_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_s64))) -void svstnt1w_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_s64))) -void svstnt1w_scatter_s64index_s64(svbool_t, int32_t *, svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_u64))) -void svstnt1w_scatter_s64index_u64(svbool_t, uint32_t *, svint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_s64))) -void svstnt1w_scatter_u64index_s64(svbool_t, int32_t *, svuint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_u64))) -void svstnt1w_scatter_u64index_u64(svbool_t, uint32_t *, svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_s64))) -void svstnt1w_scatter_s64offset_s64(svbool_t, int32_t *, svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_u64))) -void svstnt1w_scatter_s64offset_u64(svbool_t, uint32_t *, svint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_s64))) -void svstnt1w_scatter_u64offset_s64(svbool_t, int32_t *, svuint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_u64))) -void svstnt1w_scatter_u64offset_u64(svbool_t, uint32_t *, svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u32_z))) -svuint32_t svhistcnt_z(svbool_t, svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u64_z))) -svuint64_t svhistcnt_z(svbool_t, svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s32_z))) -svuint32_t svhistcnt_z(svbool_t, svint32_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s64_z))) -svuint64_t svhistcnt_z(svbool_t, svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_u8))) -svuint8_t svhistseg(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_s8))) -svuint8_t svhistseg(svint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_u32))) -svuint32_t svldnt1_gather_index_u32(svbool_t, svuint32_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_u64))) -svuint64_t svldnt1_gather_index_u64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_f64))) -svfloat64_t svldnt1_gather_index_f64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_f32))) -svfloat32_t svldnt1_gather_index_f32(svbool_t, svuint32_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_s32))) -svint32_t svldnt1_gather_index_s32(svbool_t, svuint32_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_s64))) -svint64_t svldnt1_gather_index_s64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_u32))) -svuint32_t svldnt1_gather_offset_u32(svbool_t, svuint32_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_u64))) -svuint64_t svldnt1_gather_offset_u64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_f64))) -svfloat64_t svldnt1_gather_offset_f64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_f32))) -svfloat32_t svldnt1_gather_offset_f32(svbool_t, svuint32_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_s32))) -svint32_t svldnt1_gather_offset_s32(svbool_t, svuint32_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_s64))) -svint64_t svldnt1_gather_offset_s64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_u32))) -svuint32_t svldnt1_gather_u32(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_u64))) -svuint64_t svldnt1_gather_u64(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_f64))) -svfloat64_t svldnt1_gather_f64(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_f32))) -svfloat32_t svldnt1_gather_f32(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_s32))) -svint32_t svldnt1_gather_s32(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_s64))) -svint64_t svldnt1_gather_s64(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_u64))) -svuint64_t svldnt1_gather_index(svbool_t, uint64_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_f64))) -svfloat64_t svldnt1_gather_index(svbool_t, float64_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_s64))) -svint64_t svldnt1_gather_index(svbool_t, int64_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_u64))) -svuint64_t svldnt1_gather_index(svbool_t, uint64_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_f64))) -svfloat64_t svldnt1_gather_index(svbool_t, float64_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_s64))) -svint64_t svldnt1_gather_index(svbool_t, int64_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_u32))) -svuint32_t svldnt1_gather_offset(svbool_t, uint32_t const *, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_f32))) -svfloat32_t svldnt1_gather_offset(svbool_t, float32_t const *, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_s32))) -svint32_t svldnt1_gather_offset(svbool_t, int32_t const *, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_u64))) -svuint64_t svldnt1_gather_offset(svbool_t, uint64_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_f64))) -svfloat64_t svldnt1_gather_offset(svbool_t, float64_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_s64))) -svint64_t svldnt1_gather_offset(svbool_t, int64_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_u64))) -svuint64_t svldnt1_gather_offset(svbool_t, uint64_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_f64))) -svfloat64_t svldnt1_gather_offset(svbool_t, float64_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_s64))) -svint64_t svldnt1_gather_offset(svbool_t, int64_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_u32))) -svuint32_t svldnt1sb_gather_offset_u32(svbool_t, svuint32_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_u64))) -svuint64_t svldnt1sb_gather_offset_u64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_s32))) -svint32_t svldnt1sb_gather_offset_s32(svbool_t, svuint32_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_s64))) -svint64_t svldnt1sb_gather_offset_s64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_u32))) -svuint32_t svldnt1sb_gather_u32(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_u64))) -svuint64_t svldnt1sb_gather_u64(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_s32))) -svint32_t svldnt1sb_gather_s32(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_s64))) -svint64_t svldnt1sb_gather_s64(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_u32))) -svuint32_t svldnt1sb_gather_offset_u32(svbool_t, int8_t const *, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_s32))) -svint32_t svldnt1sb_gather_offset_s32(svbool_t, int8_t const *, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_u64))) -svuint64_t svldnt1sb_gather_offset_u64(svbool_t, int8_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_s64))) -svint64_t svldnt1sb_gather_offset_s64(svbool_t, int8_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_u64))) -svuint64_t svldnt1sb_gather_offset_u64(svbool_t, int8_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_s64))) -svint64_t svldnt1sb_gather_offset_s64(svbool_t, int8_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_u32))) -svuint32_t svldnt1sh_gather_index_u32(svbool_t, svuint32_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_u64))) -svuint64_t svldnt1sh_gather_index_u64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_s32))) -svint32_t svldnt1sh_gather_index_s32(svbool_t, svuint32_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_s64))) -svint64_t svldnt1sh_gather_index_s64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_u32))) -svuint32_t svldnt1sh_gather_offset_u32(svbool_t, svuint32_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_u64))) -svuint64_t svldnt1sh_gather_offset_u64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_s32))) -svint32_t svldnt1sh_gather_offset_s32(svbool_t, svuint32_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_s64))) -svint64_t svldnt1sh_gather_offset_s64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_u32))) -svuint32_t svldnt1sh_gather_u32(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_u64))) -svuint64_t svldnt1sh_gather_u64(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_s32))) -svint32_t svldnt1sh_gather_s32(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_s64))) -svint64_t svldnt1sh_gather_s64(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_u64))) -svuint64_t svldnt1sh_gather_index_u64(svbool_t, int16_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_s64))) -svint64_t svldnt1sh_gather_index_s64(svbool_t, int16_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_u64))) -svuint64_t svldnt1sh_gather_index_u64(svbool_t, int16_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_s64))) -svint64_t svldnt1sh_gather_index_s64(svbool_t, int16_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_u32))) -svuint32_t svldnt1sh_gather_offset_u32(svbool_t, int16_t const *, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_s32))) -svint32_t svldnt1sh_gather_offset_s32(svbool_t, int16_t const *, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_u64))) -svuint64_t svldnt1sh_gather_offset_u64(svbool_t, int16_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_s64))) -svint64_t svldnt1sh_gather_offset_s64(svbool_t, int16_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_u64))) -svuint64_t svldnt1sh_gather_offset_u64(svbool_t, int16_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_s64))) -svint64_t svldnt1sh_gather_offset_s64(svbool_t, int16_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_u64))) -svuint64_t svldnt1sw_gather_index_u64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_s64))) -svint64_t svldnt1sw_gather_index_s64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_u64))) -svuint64_t svldnt1sw_gather_offset_u64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_s64))) -svint64_t svldnt1sw_gather_offset_s64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_u64))) -svuint64_t svldnt1sw_gather_u64(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_s64))) -svint64_t svldnt1sw_gather_s64(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_u64))) -svuint64_t svldnt1sw_gather_index_u64(svbool_t, int32_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_s64))) -svint64_t svldnt1sw_gather_index_s64(svbool_t, int32_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_u64))) -svuint64_t svldnt1sw_gather_index_u64(svbool_t, int32_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_s64))) -svint64_t svldnt1sw_gather_index_s64(svbool_t, int32_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_u64))) -svuint64_t svldnt1sw_gather_offset_u64(svbool_t, int32_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_s64))) -svint64_t svldnt1sw_gather_offset_s64(svbool_t, int32_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_u64))) -svuint64_t svldnt1sw_gather_offset_u64(svbool_t, int32_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_s64))) -svint64_t svldnt1sw_gather_offset_s64(svbool_t, int32_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_u32))) -svuint32_t svldnt1ub_gather_offset_u32(svbool_t, svuint32_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_u64))) -svuint64_t svldnt1ub_gather_offset_u64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_s32))) -svint32_t svldnt1ub_gather_offset_s32(svbool_t, svuint32_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_s64))) -svint64_t svldnt1ub_gather_offset_s64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_u32))) -svuint32_t svldnt1ub_gather_u32(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_u64))) -svuint64_t svldnt1ub_gather_u64(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_s32))) -svint32_t svldnt1ub_gather_s32(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_s64))) -svint64_t svldnt1ub_gather_s64(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_u32))) -svuint32_t svldnt1ub_gather_offset_u32(svbool_t, uint8_t const *, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_s32))) -svint32_t svldnt1ub_gather_offset_s32(svbool_t, uint8_t const *, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_u64))) -svuint64_t svldnt1ub_gather_offset_u64(svbool_t, uint8_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_s64))) -svint64_t svldnt1ub_gather_offset_s64(svbool_t, uint8_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_u64))) -svuint64_t svldnt1ub_gather_offset_u64(svbool_t, uint8_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_s64))) -svint64_t svldnt1ub_gather_offset_s64(svbool_t, uint8_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_u32))) -svuint32_t svldnt1uh_gather_index_u32(svbool_t, svuint32_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_u64))) -svuint64_t svldnt1uh_gather_index_u64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_s32))) -svint32_t svldnt1uh_gather_index_s32(svbool_t, svuint32_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_s64))) -svint64_t svldnt1uh_gather_index_s64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_u32))) -svuint32_t svldnt1uh_gather_offset_u32(svbool_t, svuint32_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_u64))) -svuint64_t svldnt1uh_gather_offset_u64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_s32))) -svint32_t svldnt1uh_gather_offset_s32(svbool_t, svuint32_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_s64))) -svint64_t svldnt1uh_gather_offset_s64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_u32))) -svuint32_t svldnt1uh_gather_u32(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_u64))) -svuint64_t svldnt1uh_gather_u64(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_s32))) -svint32_t svldnt1uh_gather_s32(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_s64))) -svint64_t svldnt1uh_gather_s64(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_u64))) -svuint64_t svldnt1uh_gather_index_u64(svbool_t, uint16_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_s64))) -svint64_t svldnt1uh_gather_index_s64(svbool_t, uint16_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_u64))) -svuint64_t svldnt1uh_gather_index_u64(svbool_t, uint16_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_s64))) -svint64_t svldnt1uh_gather_index_s64(svbool_t, uint16_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_u32))) -svuint32_t svldnt1uh_gather_offset_u32(svbool_t, uint16_t const *, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_s32))) -svint32_t svldnt1uh_gather_offset_s32(svbool_t, uint16_t const *, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_u64))) -svuint64_t svldnt1uh_gather_offset_u64(svbool_t, uint16_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_s64))) -svint64_t svldnt1uh_gather_offset_s64(svbool_t, uint16_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_u64))) -svuint64_t svldnt1uh_gather_offset_u64(svbool_t, uint16_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_s64))) -svint64_t svldnt1uh_gather_offset_s64(svbool_t, uint16_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_u64))) -svuint64_t svldnt1uw_gather_index_u64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_s64))) -svint64_t svldnt1uw_gather_index_s64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_u64))) -svuint64_t svldnt1uw_gather_offset_u64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_s64))) -svint64_t svldnt1uw_gather_offset_s64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_u64))) -svuint64_t svldnt1uw_gather_u64(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_s64))) -svint64_t svldnt1uw_gather_s64(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_u64))) -svuint64_t svldnt1uw_gather_index_u64(svbool_t, uint32_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_s64))) -svint64_t svldnt1uw_gather_index_s64(svbool_t, uint32_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_u64))) -svuint64_t svldnt1uw_gather_index_u64(svbool_t, uint32_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_s64))) -svint64_t svldnt1uw_gather_index_s64(svbool_t, uint32_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_u64))) -svuint64_t svldnt1uw_gather_offset_u64(svbool_t, uint32_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_s64))) -svint64_t svldnt1uw_gather_offset_s64(svbool_t, uint32_t const *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_u64))) -svuint64_t svldnt1uw_gather_offset_u64(svbool_t, uint32_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_s64))) -svint64_t svldnt1uw_gather_offset_s64(svbool_t, uint32_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u8))) -svbool_t svmatch(svbool_t, svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u16))) -svbool_t svmatch(svbool_t, svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s8))) -svbool_t svmatch(svbool_t, svint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s16))) -svbool_t svmatch(svbool_t, svint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u8))) -svbool_t svnmatch(svbool_t, svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u16))) -svbool_t svnmatch(svbool_t, svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s8))) -svbool_t svnmatch(svbool_t, svint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s16))) -svbool_t svnmatch(svbool_t, svint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_u32))) -void svstnt1_scatter_index(svbool_t, svuint32_t, int64_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_u64))) -void svstnt1_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_f64))) -void svstnt1_scatter_index(svbool_t, svuint64_t, int64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_f32))) -void svstnt1_scatter_index(svbool_t, svuint32_t, int64_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_s32))) -void svstnt1_scatter_index(svbool_t, svuint32_t, int64_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_s64))) -void svstnt1_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_u32))) -void svstnt1_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_u64))) -void svstnt1_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_f64))) -void svstnt1_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_f32))) -void svstnt1_scatter_offset(svbool_t, svuint32_t, int64_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_s32))) -void svstnt1_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_s64))) -void svstnt1_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_u32))) -void svstnt1_scatter(svbool_t, svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_u64))) -void svstnt1_scatter(svbool_t, svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_f64))) -void svstnt1_scatter(svbool_t, svuint64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_f32))) -void svstnt1_scatter(svbool_t, svuint32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_s32))) -void svstnt1_scatter(svbool_t, svuint32_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_s64))) -void svstnt1_scatter(svbool_t, svuint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_u64))) -void svstnt1_scatter_index(svbool_t, uint64_t *, svint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_f64))) -void svstnt1_scatter_index(svbool_t, float64_t *, svint64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_s64))) -void svstnt1_scatter_index(svbool_t, int64_t *, svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_u64))) -void svstnt1_scatter_index(svbool_t, uint64_t *, svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_f64))) -void svstnt1_scatter_index(svbool_t, float64_t *, svuint64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_s64))) -void svstnt1_scatter_index(svbool_t, int64_t *, svuint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_u32))) -void svstnt1_scatter_offset(svbool_t, uint32_t *, svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_f32))) -void svstnt1_scatter_offset(svbool_t, float32_t *, svuint32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_s32))) -void svstnt1_scatter_offset(svbool_t, int32_t *, svuint32_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_u64))) -void svstnt1_scatter_offset(svbool_t, uint64_t *, svint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_f64))) -void svstnt1_scatter_offset(svbool_t, float64_t *, svint64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_s64))) -void svstnt1_scatter_offset(svbool_t, int64_t *, svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_u64))) -void svstnt1_scatter_offset(svbool_t, uint64_t *, svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_f64))) -void svstnt1_scatter_offset(svbool_t, float64_t *, svuint64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_s64))) -void svstnt1_scatter_offset(svbool_t, int64_t *, svuint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_u32))) -void svstnt1b_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_u64))) -void svstnt1b_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_s32))) -void svstnt1b_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_s64))) -void svstnt1b_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_u32))) -void svstnt1b_scatter(svbool_t, svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_u64))) -void svstnt1b_scatter(svbool_t, svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_s32))) -void svstnt1b_scatter(svbool_t, svuint32_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_s64))) -void svstnt1b_scatter(svbool_t, svuint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_s32))) -void svstnt1b_scatter_offset(svbool_t, int8_t *, svuint32_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_u32))) -void svstnt1b_scatter_offset(svbool_t, uint8_t *, svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_s64))) -void svstnt1b_scatter_offset(svbool_t, int8_t *, svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_u64))) -void svstnt1b_scatter_offset(svbool_t, uint8_t *, svint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_s64))) -void svstnt1b_scatter_offset(svbool_t, int8_t *, svuint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_u64))) -void svstnt1b_scatter_offset(svbool_t, uint8_t *, svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_u32))) -void svstnt1h_scatter_index(svbool_t, svuint32_t, int64_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_u64))) -void svstnt1h_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_s32))) -void svstnt1h_scatter_index(svbool_t, svuint32_t, int64_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_s64))) -void svstnt1h_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_u32))) -void svstnt1h_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_u64))) -void svstnt1h_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_s32))) -void svstnt1h_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_s64))) -void svstnt1h_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_u32))) -void svstnt1h_scatter(svbool_t, svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_u64))) -void svstnt1h_scatter(svbool_t, svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_s32))) -void svstnt1h_scatter(svbool_t, svuint32_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_s64))) -void svstnt1h_scatter(svbool_t, svuint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_s64))) -void svstnt1h_scatter_index(svbool_t, int16_t *, svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_u64))) -void svstnt1h_scatter_index(svbool_t, uint16_t *, svint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_s64))) -void svstnt1h_scatter_index(svbool_t, int16_t *, svuint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_u64))) -void svstnt1h_scatter_index(svbool_t, uint16_t *, svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_s32))) -void svstnt1h_scatter_offset(svbool_t, int16_t *, svuint32_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_u32))) -void svstnt1h_scatter_offset(svbool_t, uint16_t *, svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_s64))) -void svstnt1h_scatter_offset(svbool_t, int16_t *, svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_u64))) -void svstnt1h_scatter_offset(svbool_t, uint16_t *, svint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_s64))) -void svstnt1h_scatter_offset(svbool_t, int16_t *, svuint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_u64))) -void svstnt1h_scatter_offset(svbool_t, uint16_t *, svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_u64))) -void svstnt1w_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_s64))) -void svstnt1w_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_u64))) -void svstnt1w_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_s64))) -void svstnt1w_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_u64))) -void svstnt1w_scatter(svbool_t, svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_s64))) -void svstnt1w_scatter(svbool_t, svuint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_s64))) -void svstnt1w_scatter_index(svbool_t, int32_t *, svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_u64))) -void svstnt1w_scatter_index(svbool_t, uint32_t *, svint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_s64))) -void svstnt1w_scatter_index(svbool_t, int32_t *, svuint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_u64))) -void svstnt1w_scatter_index(svbool_t, uint32_t *, svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_s64))) -void svstnt1w_scatter_offset(svbool_t, int32_t *, svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_u64))) -void svstnt1w_scatter_offset(svbool_t, uint32_t *, svint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_s64))) -void svstnt1w_scatter_offset(svbool_t, int32_t *, svuint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_u64))) -void svstnt1w_scatter_offset(svbool_t, uint32_t *, svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_m))) -svfloat64_t svamax_n_f64_m(svbool_t, svfloat64_t, float64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_m))) -svfloat32_t svamax_n_f32_m(svbool_t, svfloat32_t, float32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_m))) -svfloat16_t svamax_n_f16_m(svbool_t, svfloat16_t, float16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_x))) -svfloat64_t svamax_n_f64_x(svbool_t, svfloat64_t, float64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_x))) -svfloat32_t svamax_n_f32_x(svbool_t, svfloat32_t, float32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_x))) -svfloat16_t svamax_n_f16_x(svbool_t, svfloat16_t, float16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_z))) -svfloat64_t svamax_n_f64_z(svbool_t, svfloat64_t, float64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_z))) -svfloat32_t svamax_n_f32_z(svbool_t, svfloat32_t, float32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_z))) -svfloat16_t svamax_n_f16_z(svbool_t, svfloat16_t, float16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_m))) -svfloat64_t svamax_f64_m(svbool_t, svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_m))) -svfloat32_t svamax_f32_m(svbool_t, svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_m))) -svfloat16_t svamax_f16_m(svbool_t, svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_x))) -svfloat64_t svamax_f64_x(svbool_t, svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_x))) -svfloat32_t svamax_f32_x(svbool_t, svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_x))) -svfloat16_t svamax_f16_x(svbool_t, svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_z))) -svfloat64_t svamax_f64_z(svbool_t, svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_z))) -svfloat32_t svamax_f32_z(svbool_t, svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_z))) -svfloat16_t svamax_f16_z(svbool_t, svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_m))) -svfloat64_t svamin_n_f64_m(svbool_t, svfloat64_t, float64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_m))) -svfloat32_t svamin_n_f32_m(svbool_t, svfloat32_t, float32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_m))) -svfloat16_t svamin_n_f16_m(svbool_t, svfloat16_t, float16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_x))) -svfloat64_t svamin_n_f64_x(svbool_t, svfloat64_t, float64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_x))) -svfloat32_t svamin_n_f32_x(svbool_t, svfloat32_t, float32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_x))) -svfloat16_t svamin_n_f16_x(svbool_t, svfloat16_t, float16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_z))) -svfloat64_t svamin_n_f64_z(svbool_t, svfloat64_t, float64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_z))) -svfloat32_t svamin_n_f32_z(svbool_t, svfloat32_t, float32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_z))) -svfloat16_t svamin_n_f16_z(svbool_t, svfloat16_t, float16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_m))) -svfloat64_t svamin_f64_m(svbool_t, svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_m))) -svfloat32_t svamin_f32_m(svbool_t, svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_m))) -svfloat16_t svamin_f16_m(svbool_t, svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_x))) -svfloat64_t svamin_f64_x(svbool_t, svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_x))) -svfloat32_t svamin_f32_x(svbool_t, svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_x))) -svfloat16_t svamin_f16_x(svbool_t, svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_z))) -svfloat64_t svamin_f64_z(svbool_t, svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_z))) -svfloat32_t svamin_f32_z(svbool_t, svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_z))) -svfloat16_t svamin_f16_z(svbool_t, svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_m))) -svfloat64_t svamax_m(svbool_t, svfloat64_t, float64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_m))) -svfloat32_t svamax_m(svbool_t, svfloat32_t, float32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_m))) -svfloat16_t svamax_m(svbool_t, svfloat16_t, float16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_x))) -svfloat64_t svamax_x(svbool_t, svfloat64_t, float64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_x))) -svfloat32_t svamax_x(svbool_t, svfloat32_t, float32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_x))) -svfloat16_t svamax_x(svbool_t, svfloat16_t, float16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_z))) -svfloat64_t svamax_z(svbool_t, svfloat64_t, float64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_z))) -svfloat32_t svamax_z(svbool_t, svfloat32_t, float32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_z))) -svfloat16_t svamax_z(svbool_t, svfloat16_t, float16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_m))) -svfloat64_t svamax_m(svbool_t, svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_m))) -svfloat32_t svamax_m(svbool_t, svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_m))) -svfloat16_t svamax_m(svbool_t, svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_x))) -svfloat64_t svamax_x(svbool_t, svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_x))) -svfloat32_t svamax_x(svbool_t, svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_x))) -svfloat16_t svamax_x(svbool_t, svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_z))) -svfloat64_t svamax_z(svbool_t, svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_z))) -svfloat32_t svamax_z(svbool_t, svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_z))) -svfloat16_t svamax_z(svbool_t, svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_m))) -svfloat64_t svamin_m(svbool_t, svfloat64_t, float64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_m))) -svfloat32_t svamin_m(svbool_t, svfloat32_t, float32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_m))) -svfloat16_t svamin_m(svbool_t, svfloat16_t, float16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_x))) -svfloat64_t svamin_x(svbool_t, svfloat64_t, float64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_x))) -svfloat32_t svamin_x(svbool_t, svfloat32_t, float32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_x))) -svfloat16_t svamin_x(svbool_t, svfloat16_t, float16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_z))) -svfloat64_t svamin_z(svbool_t, svfloat64_t, float64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_z))) -svfloat32_t svamin_z(svbool_t, svfloat32_t, float32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_z))) -svfloat16_t svamin_z(svbool_t, svfloat16_t, float16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_m))) -svfloat64_t svamin_m(svbool_t, svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_m))) -svfloat32_t svamin_m(svbool_t, svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_m))) -svfloat16_t svamin_m(svbool_t, svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_x))) -svfloat64_t svamin_x(svbool_t, svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_x))) -svfloat32_t svamin_x(svbool_t, svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_x))) -svfloat16_t svamin_x(svbool_t, svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_z))) -svfloat64_t svamin_z(svbool_t, svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_z))) -svfloat32_t svamin_z(svbool_t, svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_z))) -svfloat16_t svamin_z(svbool_t, svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f16_mf8_fpm))) -svfloat16_t svdot_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f16_mf8_fpm))) -svfloat16_t svdot_n_f16_mf8_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f16_mf8_fpm))) -svfloat16_t svdot_lane_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f16_mf8_fpm))) -svfloat16_t svdot_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f16_mf8_fpm))) -svfloat16_t svdot_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f16_mf8_fpm))) -svfloat16_t svdot_lane_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_mf8_fpm))) -svfloat32_t svdot_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f32_mf8_fpm))) -svfloat32_t svdot_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_mf8_fpm))) -svfloat32_t svdot_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_mf8_fpm))) -svfloat32_t svdot_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f32_mf8_fpm))) -svfloat32_t svdot_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_mf8_fpm))) -svfloat32_t svdot_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_f16_mf8_fpm))) -svfloat16_t svmlalb_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_f16_mf8_fpm))) -svfloat16_t svmlalb_n_f16_mf8_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_f16_mf8_fpm))) -svfloat16_t svmlalb_lane_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_f32_mf8_fpm))) -svfloat32_t svmlallbb_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_n_f32_mf8_fpm))) -svfloat32_t svmlallbb_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_lane_f32_mf8_fpm))) -svfloat32_t svmlallbb_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_f32_mf8_fpm))) -svfloat32_t svmlallbt_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_n_f32_mf8_fpm))) -svfloat32_t svmlallbt_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_lane_f32_mf8_fpm))) -svfloat32_t svmlallbt_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_f32_mf8_fpm))) -svfloat32_t svmlalltb_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_n_f32_mf8_fpm))) -svfloat32_t svmlalltb_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_lane_f32_mf8_fpm))) -svfloat32_t svmlalltb_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_f32_mf8_fpm))) -svfloat32_t svmlalltt_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_n_f32_mf8_fpm))) -svfloat32_t svmlalltt_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_lane_f32_mf8_fpm))) -svfloat32_t svmlalltt_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_f16_mf8_fpm))) -svfloat16_t svmlalt_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_f16_mf8_fpm))) -svfloat16_t svmlalt_n_f16_mf8_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_f16_mf8_fpm))) -svfloat16_t svmlalt_lane_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_f16_mf8_fpm))) -svfloat16_t svmlalb_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_f16_mf8_fpm))) -svfloat16_t svmlalb_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_f16_mf8_fpm))) -svfloat16_t svmlalb_lane_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_f32_mf8_fpm))) -svfloat32_t svmlallbb_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_n_f32_mf8_fpm))) -svfloat32_t svmlallbb_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_lane_f32_mf8_fpm))) -svfloat32_t svmlallbb_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_f32_mf8_fpm))) -svfloat32_t svmlallbt_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_n_f32_mf8_fpm))) -svfloat32_t svmlallbt_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_lane_f32_mf8_fpm))) -svfloat32_t svmlallbt_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_f32_mf8_fpm))) -svfloat32_t svmlalltb_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_n_f32_mf8_fpm))) -svfloat32_t svmlalltb_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_lane_f32_mf8_fpm))) -svfloat32_t svmlalltb_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_f32_mf8_fpm))) -svfloat32_t svmlalltt_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_n_f32_mf8_fpm))) -svfloat32_t svmlalltt_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_lane_f32_mf8_fpm))) -svfloat32_t svmlalltt_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_f16_mf8_fpm))) -svfloat16_t svmlalt_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_f16_mf8_fpm))) -svfloat16_t svmlalt_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_f16_mf8_fpm))) -svfloat16_t svmlalt_lane_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_bf16_mf8_fpm))) -svbfloat16_t svcvt1_bf16_mf8_fpm(svmfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_f16_mf8_fpm))) -svfloat16_t svcvt1_f16_mf8_fpm(svmfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_bf16_mf8_fpm))) -svbfloat16_t svcvt2_bf16_mf8_fpm(svmfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_f16_mf8_fpm))) -svfloat16_t svcvt2_f16_mf8_fpm(svmfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_bf16_mf8_fpm))) -svbfloat16_t svcvtlt1_bf16_mf8_fpm(svmfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_f16_mf8_fpm))) -svfloat16_t svcvtlt1_f16_mf8_fpm(svmfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_bf16_mf8_fpm))) -svbfloat16_t svcvtlt2_bf16_mf8_fpm(svmfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_f16_mf8_fpm))) -svfloat16_t svcvtlt2_f16_mf8_fpm(svmfloat8_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_bf16_x2_fpm))) -svmfloat8_t svcvtn_mf8_bf16_x2_fpm(svbfloat16x2_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_f16_x2_fpm))) -svmfloat8_t svcvtn_mf8_f16_x2_fpm(svfloat16x2_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnb_mf8_f32_x2_fpm))) -svmfloat8_t svcvtnb_mf8_f32_x2_fpm(svfloat32x2_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_mf8_f32_x2_fpm))) -svmfloat8_t svcvtnt_mf8_f32_x2_fpm(svmfloat8_t, svfloat32x2_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_bf16_mf8_fpm))) -svbfloat16_t svcvt1_bf16_fpm(svmfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_f16_mf8_fpm))) -svfloat16_t svcvt1_f16_fpm(svmfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_bf16_mf8_fpm))) -svbfloat16_t svcvt2_bf16_fpm(svmfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_f16_mf8_fpm))) -svfloat16_t svcvt2_f16_fpm(svmfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_bf16_mf8_fpm))) -svbfloat16_t svcvtlt1_bf16_fpm(svmfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_f16_mf8_fpm))) -svfloat16_t svcvtlt1_f16_fpm(svmfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_bf16_mf8_fpm))) -svbfloat16_t svcvtlt2_bf16_fpm(svmfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_f16_mf8_fpm))) -svfloat16_t svcvtlt2_f16_fpm(svmfloat8_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_bf16_x2_fpm))) -svmfloat8_t svcvtn_mf8_fpm(svbfloat16x2_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_f16_x2_fpm))) -svmfloat8_t svcvtn_mf8_fpm(svfloat16x2_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnb_mf8_f32_x2_fpm))) -svmfloat8_t svcvtnb_mf8_fpm(svfloat32x2_t, fpm_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_mf8_f32_x2_fpm))) -svmfloat8_t svcvtnt_mf8_fpm(svmfloat8_t, svfloat32x2_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u8))) -svuint8_t svluti2_lane_u8(svuint8_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s8))) -svint8_t svluti2_lane_s8(svint8_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u16))) -svuint16_t svluti2_lane_u16(svuint16_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_bf16))) -svbfloat16_t svluti2_lane_bf16(svbfloat16_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_f16))) -svfloat16_t svluti2_lane_f16(svfloat16_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s16))) -svint16_t svluti2_lane_s16(svint16_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u8))) -svuint8_t svluti4_lane_u8(svuint8_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s8))) -svint8_t svluti4_lane_s8(svint8_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16))) -svuint16_t svluti4_lane_u16(svuint16_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16))) -svbfloat16_t svluti4_lane_bf16(svbfloat16_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16))) -svfloat16_t svluti4_lane_f16(svfloat16_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16))) -svint16_t svluti4_lane_s16(svint16_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16_x2))) -svuint16_t svluti4_lane_u16_x2(svuint16x2_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16_x2))) -svbfloat16_t svluti4_lane_bf16_x2(svbfloat16x2_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16_x2))) -svfloat16_t svluti4_lane_f16_x2(svfloat16x2_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16_x2))) -svint16_t svluti4_lane_s16_x2(svint16x2_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u8))) -svuint8_t svluti2_lane(svuint8_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s8))) -svint8_t svluti2_lane(svint8_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u16))) -svuint16_t svluti2_lane(svuint16_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_bf16))) -svbfloat16_t svluti2_lane(svbfloat16_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_f16))) -svfloat16_t svluti2_lane(svfloat16_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s16))) -svint16_t svluti2_lane(svint16_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u8))) -svuint8_t svluti4_lane(svuint8_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s8))) -svint8_t svluti4_lane(svint8_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16))) -svuint16_t svluti4_lane(svuint16_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16))) -svbfloat16_t svluti4_lane(svbfloat16_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16))) -svfloat16_t svluti4_lane(svfloat16_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16))) -svint16_t svluti4_lane(svint16_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16_x2))) -svuint16_t svluti4_lane(svuint16x2_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16_x2))) -svbfloat16_t svluti4_lane(svbfloat16x2_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16_x2))) -svfloat16_t svluti4_lane(svfloat16x2_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16_x2))) -svint16_t svluti4_lane(svint16x2_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_u8))) -svuint8_t svaesd_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_u8))) -svuint8_t svaese_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesimc_u8))) -svuint8_t svaesimc_u8(svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesmc_u8))) -svuint8_t svaesmc_u8(svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u64))) -svuint64_t svpmullb_pair_n_u64(svuint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u64))) -svuint64_t svpmullb_pair_u64(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u64))) -svuint64_t svpmullt_pair_n_u64(svuint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u64))) -svuint64_t svpmullt_pair_u64(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_u8))) -svuint8_t svaesd(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_u8))) -svuint8_t svaese(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesimc_u8))) -svuint8_t svaesimc(svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesmc_u8))) -svuint8_t svaesmc(svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u64))) -svuint64_t svpmullb_pair(svuint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u64))) -svuint64_t svpmullb_pair(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u64))) -svuint64_t svpmullt_pair(svuint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u64))) -svuint64_t svpmullt_pair(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_m))) -svbfloat16_t svadd_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_x))) -svbfloat16_t svadd_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_z))) -svbfloat16_t svadd_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_m))) -svbfloat16_t svadd_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_x))) -svbfloat16_t svadd_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_z))) -svbfloat16_t svadd_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_bf16))) -svbfloat16_t svclamp_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_m))) -svbfloat16_t svmax_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_x))) -svbfloat16_t svmax_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_z))) -svbfloat16_t svmax_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_m))) -svbfloat16_t svmax_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_x))) -svbfloat16_t svmax_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_z))) -svbfloat16_t svmax_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_m))) -svbfloat16_t svmaxnm_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_x))) -svbfloat16_t svmaxnm_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_z))) -svbfloat16_t svmaxnm_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_m))) -svbfloat16_t svmaxnm_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_x))) -svbfloat16_t svmaxnm_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_z))) -svbfloat16_t svmaxnm_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_m))) -svbfloat16_t svmin_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_x))) -svbfloat16_t svmin_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_z))) -svbfloat16_t svmin_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_m))) -svbfloat16_t svmin_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_x))) -svbfloat16_t svmin_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_z))) -svbfloat16_t svmin_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_m))) -svbfloat16_t svminnm_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_x))) -svbfloat16_t svminnm_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_z))) -svbfloat16_t svminnm_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_m))) -svbfloat16_t svminnm_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x))) -svbfloat16_t svminnm_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_z))) -svbfloat16_t svminnm_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_m))) -svbfloat16_t svmla_n_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_x))) -svbfloat16_t svmla_n_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_z))) -svbfloat16_t svmla_n_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_m))) -svbfloat16_t svmla_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_x))) -svbfloat16_t svmla_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_z))) -svbfloat16_t svmla_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_bf16))) -svbfloat16_t svmla_lane_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_m))) -svbfloat16_t svmls_n_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_x))) -svbfloat16_t svmls_n_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_z))) -svbfloat16_t svmls_n_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_m))) -svbfloat16_t svmls_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_x))) -svbfloat16_t svmls_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_z))) -svbfloat16_t svmls_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_bf16))) -svbfloat16_t svmls_lane_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_m))) -svbfloat16_t svmul_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_x))) -svbfloat16_t svmul_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_z))) -svbfloat16_t svmul_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_m))) -svbfloat16_t svmul_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x))) -svbfloat16_t svmul_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_z))) -svbfloat16_t svmul_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_bf16))) -svbfloat16_t svmul_lane_bf16(svbfloat16_t, svbfloat16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_m))) -svbfloat16_t svsub_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_x))) -svbfloat16_t svsub_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_z))) -svbfloat16_t svsub_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_m))) -svbfloat16_t svsub_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_x))) -svbfloat16_t svsub_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_z))) -svbfloat16_t svsub_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_m))) -svbfloat16_t svadd_m(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_x))) -svbfloat16_t svadd_x(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_z))) -svbfloat16_t svadd_z(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_m))) -svbfloat16_t svadd_m(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_x))) -svbfloat16_t svadd_x(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_z))) -svbfloat16_t svadd_z(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_bf16))) -svbfloat16_t svclamp(svbfloat16_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_m))) -svbfloat16_t svmax_m(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_x))) -svbfloat16_t svmax_x(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_z))) -svbfloat16_t svmax_z(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_m))) -svbfloat16_t svmax_m(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_x))) -svbfloat16_t svmax_x(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_z))) -svbfloat16_t svmax_z(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_m))) -svbfloat16_t svmaxnm_m(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_x))) -svbfloat16_t svmaxnm_x(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_z))) -svbfloat16_t svmaxnm_z(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_m))) -svbfloat16_t svmaxnm_m(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_x))) -svbfloat16_t svmaxnm_x(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_z))) -svbfloat16_t svmaxnm_z(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_m))) -svbfloat16_t svmin_m(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_x))) -svbfloat16_t svmin_x(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_z))) -svbfloat16_t svmin_z(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_m))) -svbfloat16_t svmin_m(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_x))) -svbfloat16_t svmin_x(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_z))) -svbfloat16_t svmin_z(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_m))) -svbfloat16_t svminnm_m(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_x))) -svbfloat16_t svminnm_x(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_z))) -svbfloat16_t svminnm_z(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_m))) -svbfloat16_t svminnm_m(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x))) -svbfloat16_t svminnm_x(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_z))) -svbfloat16_t svminnm_z(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_m))) -svbfloat16_t svmla_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_x))) -svbfloat16_t svmla_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_z))) -svbfloat16_t svmla_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_m))) -svbfloat16_t svmla_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_x))) -svbfloat16_t svmla_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_z))) -svbfloat16_t svmla_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_bf16))) -svbfloat16_t svmla_lane(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_m))) -svbfloat16_t svmls_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_x))) -svbfloat16_t svmls_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_z))) -svbfloat16_t svmls_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_m))) -svbfloat16_t svmls_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_x))) -svbfloat16_t svmls_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_z))) -svbfloat16_t svmls_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_bf16))) -svbfloat16_t svmls_lane(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_m))) -svbfloat16_t svmul_m(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_x))) -svbfloat16_t svmul_x(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_z))) -svbfloat16_t svmul_z(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_m))) -svbfloat16_t svmul_m(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x))) -svbfloat16_t svmul_x(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_z))) -svbfloat16_t svmul_z(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_bf16))) -svbfloat16_t svmul_lane(svbfloat16_t, svbfloat16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_m))) -svbfloat16_t svsub_m(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_x))) -svbfloat16_t svsub_x(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_z))) -svbfloat16_t svsub_z(svbool_t, svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_m))) -svbfloat16_t svsub_m(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_x))) -svbfloat16_t svsub_x(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_z))) -svbfloat16_t svsub_z(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u8))) -svuint8_t svbdep_n_u8(svuint8_t, uint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u32))) -svuint32_t svbdep_n_u32(svuint32_t, uint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u64))) -svuint64_t svbdep_n_u64(svuint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u16))) -svuint16_t svbdep_n_u16(svuint16_t, uint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u8))) -svuint8_t svbdep_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u32))) -svuint32_t svbdep_u32(svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u64))) -svuint64_t svbdep_u64(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u16))) -svuint16_t svbdep_u16(svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u8))) -svuint8_t svbext_n_u8(svuint8_t, uint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u32))) -svuint32_t svbext_n_u32(svuint32_t, uint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u64))) -svuint64_t svbext_n_u64(svuint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u16))) -svuint16_t svbext_n_u16(svuint16_t, uint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u8))) -svuint8_t svbext_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u32))) -svuint32_t svbext_u32(svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u64))) -svuint64_t svbext_u64(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u16))) -svuint16_t svbext_u16(svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u8))) -svuint8_t svbgrp_n_u8(svuint8_t, uint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u32))) -svuint32_t svbgrp_n_u32(svuint32_t, uint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u64))) -svuint64_t svbgrp_n_u64(svuint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u16))) -svuint16_t svbgrp_n_u16(svuint16_t, uint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u8))) -svuint8_t svbgrp_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u32))) -svuint32_t svbgrp_u32(svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u64))) -svuint64_t svbgrp_u64(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u16))) -svuint16_t svbgrp_u16(svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u8))) -svuint8_t svbdep(svuint8_t, uint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u32))) -svuint32_t svbdep(svuint32_t, uint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u64))) -svuint64_t svbdep(svuint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u16))) -svuint16_t svbdep(svuint16_t, uint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u8))) -svuint8_t svbdep(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u32))) -svuint32_t svbdep(svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u64))) -svuint64_t svbdep(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u16))) -svuint16_t svbdep(svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u8))) -svuint8_t svbext(svuint8_t, uint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u32))) -svuint32_t svbext(svuint32_t, uint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u64))) -svuint64_t svbext(svuint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u16))) -svuint16_t svbext(svuint16_t, uint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u8))) -svuint8_t svbext(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u32))) -svuint32_t svbext(svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u64))) -svuint64_t svbext(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u16))) -svuint16_t svbext(svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u8))) -svuint8_t svbgrp(svuint8_t, uint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u32))) -svuint32_t svbgrp(svuint32_t, uint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u64))) -svuint64_t svbgrp(svuint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u16))) -svuint16_t svbgrp(svuint16_t, uint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u8))) -svuint8_t svbgrp(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u32))) -svuint32_t svbgrp(svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u64))) -svuint64_t svbgrp(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u16))) -svuint16_t svbgrp(svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4e_u32))) -svuint32_t svsm4e_u32(svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4ekey_u32))) -svuint32_t svsm4ekey_u32(svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4e_u32))) -svuint32_t svsm4e(svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4ekey_u32))) -svuint32_t svsm4ekey(svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u32))) -svuint32_t svld1q_gather_u64base_index_u32(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u64))) -svuint64_t svld1q_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u16))) -svuint16_t svld1q_gather_u64base_index_u16(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_bf16))) -svbfloat16_t svld1q_gather_u64base_index_bf16(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f64))) -svfloat64_t svld1q_gather_u64base_index_f64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f32))) -svfloat32_t svld1q_gather_u64base_index_f32(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f16))) -svfloat16_t svld1q_gather_u64base_index_f16(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s32))) -svint32_t svld1q_gather_u64base_index_s32(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s64))) -svint64_t svld1q_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s16))) -svint16_t svld1q_gather_u64base_index_s16(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u8))) -svuint8_t svld1q_gather_u64base_offset_u8(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u32))) -svuint32_t svld1q_gather_u64base_offset_u32(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u64))) -svuint64_t svld1q_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u16))) -svuint16_t svld1q_gather_u64base_offset_u16(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_bf16))) -svbfloat16_t svld1q_gather_u64base_offset_bf16(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s8))) -svint8_t svld1q_gather_u64base_offset_s8(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f64))) -svfloat64_t svld1q_gather_u64base_offset_f64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f32))) -svfloat32_t svld1q_gather_u64base_offset_f32(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f16))) -svfloat16_t svld1q_gather_u64base_offset_f16(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s32))) -svint32_t svld1q_gather_u64base_offset_s32(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s64))) -svint64_t svld1q_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_mf8))) -svmfloat8_t svld1q_gather_u64base_offset_mf8(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s16))) -svint16_t svld1q_gather_u64base_offset_s16(svbool_t, svuint64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u8))) -svuint8_t svld1q_gather_u64base_u8(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u32))) -svuint32_t svld1q_gather_u64base_u32(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u64))) -svuint64_t svld1q_gather_u64base_u64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u16))) -svuint16_t svld1q_gather_u64base_u16(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_bf16))) -svbfloat16_t svld1q_gather_u64base_bf16(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s8))) -svint8_t svld1q_gather_u64base_s8(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f64))) -svfloat64_t svld1q_gather_u64base_f64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f32))) -svfloat32_t svld1q_gather_u64base_f32(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f16))) -svfloat16_t svld1q_gather_u64base_f16(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s32))) -svint32_t svld1q_gather_u64base_s32(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s64))) -svint64_t svld1q_gather_u64base_s64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_mf8))) -svmfloat8_t svld1q_gather_u64base_mf8(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s16))) -svint16_t svld1q_gather_u64base_s16(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u32))) -svuint32_t svld1q_gather_u64index_u32(svbool_t, uint32_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u64))) -svuint64_t svld1q_gather_u64index_u64(svbool_t, uint64_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u16))) -svuint16_t svld1q_gather_u64index_u16(svbool_t, uint16_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_bf16))) -svbfloat16_t svld1q_gather_u64index_bf16(svbool_t, bfloat16_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f64))) -svfloat64_t svld1q_gather_u64index_f64(svbool_t, float64_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f32))) -svfloat32_t svld1q_gather_u64index_f32(svbool_t, float32_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f16))) -svfloat16_t svld1q_gather_u64index_f16(svbool_t, float16_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s32))) -svint32_t svld1q_gather_u64index_s32(svbool_t, int32_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s64))) -svint64_t svld1q_gather_u64index_s64(svbool_t, int64_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s16))) -svint16_t svld1q_gather_u64index_s16(svbool_t, int16_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u8))) -svuint8_t svld1q_gather_u64offset_u8(svbool_t, uint8_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u32))) -svuint32_t svld1q_gather_u64offset_u32(svbool_t, uint32_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u64))) -svuint64_t svld1q_gather_u64offset_u64(svbool_t, uint64_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u16))) -svuint16_t svld1q_gather_u64offset_u16(svbool_t, uint16_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_bf16))) -svbfloat16_t svld1q_gather_u64offset_bf16(svbool_t, bfloat16_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s8))) -svint8_t svld1q_gather_u64offset_s8(svbool_t, int8_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f64))) -svfloat64_t svld1q_gather_u64offset_f64(svbool_t, float64_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f32))) -svfloat32_t svld1q_gather_u64offset_f32(svbool_t, float32_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f16))) -svfloat16_t svld1q_gather_u64offset_f16(svbool_t, float16_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s32))) -svint32_t svld1q_gather_u64offset_s32(svbool_t, int32_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s64))) -svint64_t svld1q_gather_u64offset_s64(svbool_t, int64_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_mf8))) -svmfloat8_t svld1q_gather_u64offset_mf8(svbool_t, mfloat8_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s16))) -svint16_t svld1q_gather_u64offset_s16(svbool_t, int16_t const *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_u64))) -svuint64_t svld1udq_u64(svbool_t, uint64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_f64))) -svfloat64_t svld1udq_f64(svbool_t, float64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_s64))) -svint64_t svld1udq_s64(svbool_t, int64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_u64))) -svuint64_t svld1udq_vnum_u64(svbool_t, uint64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_f64))) -svfloat64_t svld1udq_vnum_f64(svbool_t, float64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_s64))) -svint64_t svld1udq_vnum_s64(svbool_t, int64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_u32))) -svuint32_t svld1uwq_u32(svbool_t, uint32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_f32))) -svfloat32_t svld1uwq_f32(svbool_t, float32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_s32))) -svint32_t svld1uwq_s32(svbool_t, int32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_u32))) -svuint32_t svld1uwq_vnum_u32(svbool_t, uint32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_f32))) -svfloat32_t svld1uwq_vnum_f32(svbool_t, float32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_s32))) -svint32_t svld1uwq_vnum_s32(svbool_t, int32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_u64))) -void svst1dq_u64(svbool_t, uint64_t *, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_f64))) -void svst1dq_f64(svbool_t, float64_t *, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_s64))) -void svst1dq_s64(svbool_t, int64_t *, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_u64))) -void svst1dq_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_f64))) -void svst1dq_vnum_f64(svbool_t, float64_t *, int64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_s64))) -void svst1dq_vnum_s64(svbool_t, int64_t *, int64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u8))) -void svst1q_scatter_u64base_u8(svbool_t, svuint64_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u32))) -void svst1q_scatter_u64base_u32(svbool_t, svuint64_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u64))) -void svst1q_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u16))) -void svst1q_scatter_u64base_u16(svbool_t, svuint64_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_bf16))) -void svst1q_scatter_u64base_bf16(svbool_t, svuint64_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s8))) -void svst1q_scatter_u64base_s8(svbool_t, svuint64_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f64))) -void svst1q_scatter_u64base_f64(svbool_t, svuint64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f32))) -void svst1q_scatter_u64base_f32(svbool_t, svuint64_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f16))) -void svst1q_scatter_u64base_f16(svbool_t, svuint64_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s32))) -void svst1q_scatter_u64base_s32(svbool_t, svuint64_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s64))) -void svst1q_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_mf8))) -void svst1q_scatter_u64base_mf8(svbool_t, svuint64_t, svmfloat8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s16))) -void svst1q_scatter_u64base_s16(svbool_t, svuint64_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u32))) -void svst1q_scatter_u64base_index_u32(svbool_t, svuint64_t, int64_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u64))) -void svst1q_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u16))) -void svst1q_scatter_u64base_index_u16(svbool_t, svuint64_t, int64_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_bf16))) -void svst1q_scatter_u64base_index_bf16(svbool_t, svuint64_t, int64_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f64))) -void svst1q_scatter_u64base_index_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f32))) -void svst1q_scatter_u64base_index_f32(svbool_t, svuint64_t, int64_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f16))) -void svst1q_scatter_u64base_index_f16(svbool_t, svuint64_t, int64_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s32))) -void svst1q_scatter_u64base_index_s32(svbool_t, svuint64_t, int64_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s64))) -void svst1q_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s16))) -void svst1q_scatter_u64base_index_s16(svbool_t, svuint64_t, int64_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u8))) -void svst1q_scatter_u64base_offset_u8(svbool_t, svuint64_t, int64_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u32))) -void svst1q_scatter_u64base_offset_u32(svbool_t, svuint64_t, int64_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u64))) -void svst1q_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u16))) -void svst1q_scatter_u64base_offset_u16(svbool_t, svuint64_t, int64_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_bf16))) -void svst1q_scatter_u64base_offset_bf16(svbool_t, svuint64_t, int64_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s8))) -void svst1q_scatter_u64base_offset_s8(svbool_t, svuint64_t, int64_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f64))) -void svst1q_scatter_u64base_offset_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f32))) -void svst1q_scatter_u64base_offset_f32(svbool_t, svuint64_t, int64_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f16))) -void svst1q_scatter_u64base_offset_f16(svbool_t, svuint64_t, int64_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s32))) -void svst1q_scatter_u64base_offset_s32(svbool_t, svuint64_t, int64_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s64))) -void svst1q_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_mf8))) -void svst1q_scatter_u64base_offset_mf8(svbool_t, svuint64_t, int64_t, svmfloat8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s16))) -void svst1q_scatter_u64base_offset_s16(svbool_t, svuint64_t, int64_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u32))) -void svst1q_scatter_s64index_u32(svbool_t, uint32_t *, svint64_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u64))) -void svst1q_scatter_s64index_u64(svbool_t, uint64_t *, svint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u16))) -void svst1q_scatter_s64index_u16(svbool_t, uint16_t *, svint64_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_bf16))) -void svst1q_scatter_s64index_bf16(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f64))) -void svst1q_scatter_s64index_f64(svbool_t, float64_t *, svint64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f32))) -void svst1q_scatter_s64index_f32(svbool_t, float32_t *, svint64_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f16))) -void svst1q_scatter_s64index_f16(svbool_t, float16_t *, svint64_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s32))) -void svst1q_scatter_s64index_s32(svbool_t, int32_t *, svint64_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s64))) -void svst1q_scatter_s64index_s64(svbool_t, int64_t *, svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s16))) -void svst1q_scatter_s64index_s16(svbool_t, int16_t *, svint64_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u32))) -void svst1q_scatter_u64index_u32(svbool_t, uint32_t *, svuint64_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u64))) -void svst1q_scatter_u64index_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u16))) -void svst1q_scatter_u64index_u16(svbool_t, uint16_t *, svuint64_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_bf16))) -void svst1q_scatter_u64index_bf16(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f64))) -void svst1q_scatter_u64index_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f32))) -void svst1q_scatter_u64index_f32(svbool_t, float32_t *, svuint64_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f16))) -void svst1q_scatter_u64index_f16(svbool_t, float16_t *, svuint64_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s32))) -void svst1q_scatter_u64index_s32(svbool_t, int32_t *, svuint64_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s64))) -void svst1q_scatter_u64index_s64(svbool_t, int64_t *, svuint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s16))) -void svst1q_scatter_u64index_s16(svbool_t, int16_t *, svuint64_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u8))) -void svst1q_scatter_s64offset_u8(svbool_t, uint8_t *, svint64_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u32))) -void svst1q_scatter_s64offset_u32(svbool_t, uint32_t *, svint64_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u64))) -void svst1q_scatter_s64offset_u64(svbool_t, uint64_t *, svint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u16))) -void svst1q_scatter_s64offset_u16(svbool_t, uint16_t *, svint64_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_bf16))) -void svst1q_scatter_s64offset_bf16(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s8))) -void svst1q_scatter_s64offset_s8(svbool_t, int8_t *, svint64_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f64))) -void svst1q_scatter_s64offset_f64(svbool_t, float64_t *, svint64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f32))) -void svst1q_scatter_s64offset_f32(svbool_t, float32_t *, svint64_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f16))) -void svst1q_scatter_s64offset_f16(svbool_t, float16_t *, svint64_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s32))) -void svst1q_scatter_s64offset_s32(svbool_t, int32_t *, svint64_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s64))) -void svst1q_scatter_s64offset_s64(svbool_t, int64_t *, svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_mf8))) -void svst1q_scatter_s64offset_mf8(svbool_t, mfloat8_t *, svint64_t, svmfloat8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s16))) -void svst1q_scatter_s64offset_s16(svbool_t, int16_t *, svint64_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u8))) -void svst1q_scatter_u64offset_u8(svbool_t, uint8_t *, svuint64_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u32))) -void svst1q_scatter_u64offset_u32(svbool_t, uint32_t *, svuint64_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u64))) -void svst1q_scatter_u64offset_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u16))) -void svst1q_scatter_u64offset_u16(svbool_t, uint16_t *, svuint64_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_bf16))) -void svst1q_scatter_u64offset_bf16(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s8))) -void svst1q_scatter_u64offset_s8(svbool_t, int8_t *, svuint64_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f64))) -void svst1q_scatter_u64offset_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f32))) -void svst1q_scatter_u64offset_f32(svbool_t, float32_t *, svuint64_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f16))) -void svst1q_scatter_u64offset_f16(svbool_t, float16_t *, svuint64_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s32))) -void svst1q_scatter_u64offset_s32(svbool_t, int32_t *, svuint64_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s64))) -void svst1q_scatter_u64offset_s64(svbool_t, int64_t *, svuint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_mf8))) -void svst1q_scatter_u64offset_mf8(svbool_t, mfloat8_t *, svuint64_t, svmfloat8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s16))) -void svst1q_scatter_u64offset_s16(svbool_t, int16_t *, svuint64_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_u32))) -void svst1wq_u32(svbool_t, uint32_t *, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_f32))) -void svst1wq_f32(svbool_t, float32_t *, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_s32))) -void svst1wq_s32(svbool_t, int32_t *, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_u32))) -void svst1wq_vnum_u32(svbool_t, uint32_t *, int64_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_f32))) -void svst1wq_vnum_f32(svbool_t, float32_t *, int64_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_s32))) -void svst1wq_vnum_s32(svbool_t, int32_t *, int64_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u32))) -svuint32_t svld1q_gather_index_u32(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u64))) -svuint64_t svld1q_gather_index_u64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u16))) -svuint16_t svld1q_gather_index_u16(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_bf16))) -svbfloat16_t svld1q_gather_index_bf16(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f64))) -svfloat64_t svld1q_gather_index_f64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f32))) -svfloat32_t svld1q_gather_index_f32(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f16))) -svfloat16_t svld1q_gather_index_f16(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s32))) -svint32_t svld1q_gather_index_s32(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s64))) -svint64_t svld1q_gather_index_s64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s16))) -svint16_t svld1q_gather_index_s16(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u8))) -svuint8_t svld1q_gather_offset_u8(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u32))) -svuint32_t svld1q_gather_offset_u32(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u64))) -svuint64_t svld1q_gather_offset_u64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u16))) -svuint16_t svld1q_gather_offset_u16(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_bf16))) -svbfloat16_t svld1q_gather_offset_bf16(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s8))) -svint8_t svld1q_gather_offset_s8(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f64))) -svfloat64_t svld1q_gather_offset_f64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f32))) -svfloat32_t svld1q_gather_offset_f32(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f16))) -svfloat16_t svld1q_gather_offset_f16(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s32))) -svint32_t svld1q_gather_offset_s32(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s64))) -svint64_t svld1q_gather_offset_s64(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_mf8))) -svmfloat8_t svld1q_gather_offset_mf8(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s16))) -svint16_t svld1q_gather_offset_s16(svbool_t, svuint64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u8))) -svuint8_t svld1q_gather_u8(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u32))) -svuint32_t svld1q_gather_u32(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u64))) -svuint64_t svld1q_gather_u64(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u16))) -svuint16_t svld1q_gather_u16(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_bf16))) -svbfloat16_t svld1q_gather_bf16(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s8))) -svint8_t svld1q_gather_s8(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f64))) -svfloat64_t svld1q_gather_f64(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f32))) -svfloat32_t svld1q_gather_f32(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f16))) -svfloat16_t svld1q_gather_f16(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s32))) -svint32_t svld1q_gather_s32(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s64))) -svint64_t svld1q_gather_s64(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_mf8))) -svmfloat8_t svld1q_gather_mf8(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s16))) -svint16_t svld1q_gather_s16(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u32))) -svuint32_t svld1q_gather_index(svbool_t, uint32_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u64))) -svuint64_t svld1q_gather_index(svbool_t, uint64_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u16))) -svuint16_t svld1q_gather_index(svbool_t, uint16_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_bf16))) -svbfloat16_t svld1q_gather_index(svbool_t, bfloat16_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f64))) -svfloat64_t svld1q_gather_index(svbool_t, float64_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f32))) -svfloat32_t svld1q_gather_index(svbool_t, float32_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f16))) -svfloat16_t svld1q_gather_index(svbool_t, float16_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s32))) -svint32_t svld1q_gather_index(svbool_t, int32_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s64))) -svint64_t svld1q_gather_index(svbool_t, int64_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s16))) -svint16_t svld1q_gather_index(svbool_t, int16_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u8))) -svuint8_t svld1q_gather_offset(svbool_t, uint8_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u32))) -svuint32_t svld1q_gather_offset(svbool_t, uint32_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u64))) -svuint64_t svld1q_gather_offset(svbool_t, uint64_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u16))) -svuint16_t svld1q_gather_offset(svbool_t, uint16_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_bf16))) -svbfloat16_t svld1q_gather_offset(svbool_t, bfloat16_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s8))) -svint8_t svld1q_gather_offset(svbool_t, int8_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f64))) -svfloat64_t svld1q_gather_offset(svbool_t, float64_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f32))) -svfloat32_t svld1q_gather_offset(svbool_t, float32_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f16))) -svfloat16_t svld1q_gather_offset(svbool_t, float16_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s32))) -svint32_t svld1q_gather_offset(svbool_t, int32_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s64))) -svint64_t svld1q_gather_offset(svbool_t, int64_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_mf8))) -svmfloat8_t svld1q_gather_offset(svbool_t, mfloat8_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s16))) -svint16_t svld1q_gather_offset(svbool_t, int16_t const *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_u64))) -svuint64_t svld1udq(svbool_t, uint64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_f64))) -svfloat64_t svld1udq(svbool_t, float64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_s64))) -svint64_t svld1udq(svbool_t, int64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_u64))) -svuint64_t svld1udq_vnum(svbool_t, uint64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_f64))) -svfloat64_t svld1udq_vnum(svbool_t, float64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_s64))) -svint64_t svld1udq_vnum(svbool_t, int64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_u32))) -svuint32_t svld1uwq(svbool_t, uint32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_f32))) -svfloat32_t svld1uwq(svbool_t, float32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_s32))) -svint32_t svld1uwq(svbool_t, int32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_u32))) -svuint32_t svld1uwq_vnum(svbool_t, uint32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_f32))) -svfloat32_t svld1uwq_vnum(svbool_t, float32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_s32))) -svint32_t svld1uwq_vnum(svbool_t, int32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_u64))) -void svst1dq(svbool_t, uint64_t *, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_f64))) -void svst1dq(svbool_t, float64_t *, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_s64))) -void svst1dq(svbool_t, int64_t *, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_u64))) -void svst1dq_vnum(svbool_t, uint64_t *, int64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_f64))) -void svst1dq_vnum(svbool_t, float64_t *, int64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_s64))) -void svst1dq_vnum(svbool_t, int64_t *, int64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u8))) -void svst1q_scatter(svbool_t, svuint64_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u32))) -void svst1q_scatter(svbool_t, svuint64_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u64))) -void svst1q_scatter(svbool_t, svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u16))) -void svst1q_scatter(svbool_t, svuint64_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_bf16))) -void svst1q_scatter(svbool_t, svuint64_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s8))) -void svst1q_scatter(svbool_t, svuint64_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f64))) -void svst1q_scatter(svbool_t, svuint64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f32))) -void svst1q_scatter(svbool_t, svuint64_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f16))) -void svst1q_scatter(svbool_t, svuint64_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s32))) -void svst1q_scatter(svbool_t, svuint64_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s64))) -void svst1q_scatter(svbool_t, svuint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_mf8))) -void svst1q_scatter(svbool_t, svuint64_t, svmfloat8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s16))) -void svst1q_scatter(svbool_t, svuint64_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u32))) -void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u64))) -void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u16))) -void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_bf16))) -void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f64))) -void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f32))) -void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f16))) -void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s32))) -void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s64))) -void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s16))) -void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u8))) -void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u32))) -void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u64))) -void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u16))) -void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_bf16))) -void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s8))) -void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f64))) -void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f32))) -void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f16))) -void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s32))) -void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s64))) -void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_mf8))) -void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svmfloat8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s16))) -void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u32))) -void svst1q_scatter_index(svbool_t, uint32_t *, svint64_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u64))) -void svst1q_scatter_index(svbool_t, uint64_t *, svint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u16))) -void svst1q_scatter_index(svbool_t, uint16_t *, svint64_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_bf16))) -void svst1q_scatter_index(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f64))) -void svst1q_scatter_index(svbool_t, float64_t *, svint64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f32))) -void svst1q_scatter_index(svbool_t, float32_t *, svint64_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f16))) -void svst1q_scatter_index(svbool_t, float16_t *, svint64_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s32))) -void svst1q_scatter_index(svbool_t, int32_t *, svint64_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s64))) -void svst1q_scatter_index(svbool_t, int64_t *, svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s16))) -void svst1q_scatter_index(svbool_t, int16_t *, svint64_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u32))) -void svst1q_scatter_index(svbool_t, uint32_t *, svuint64_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u64))) -void svst1q_scatter_index(svbool_t, uint64_t *, svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u16))) -void svst1q_scatter_index(svbool_t, uint16_t *, svuint64_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_bf16))) -void svst1q_scatter_index(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f64))) -void svst1q_scatter_index(svbool_t, float64_t *, svuint64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f32))) -void svst1q_scatter_index(svbool_t, float32_t *, svuint64_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f16))) -void svst1q_scatter_index(svbool_t, float16_t *, svuint64_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s32))) -void svst1q_scatter_index(svbool_t, int32_t *, svuint64_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s64))) -void svst1q_scatter_index(svbool_t, int64_t *, svuint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s16))) -void svst1q_scatter_index(svbool_t, int16_t *, svuint64_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u8))) -void svst1q_scatter_offset(svbool_t, uint8_t *, svint64_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u32))) -void svst1q_scatter_offset(svbool_t, uint32_t *, svint64_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u64))) -void svst1q_scatter_offset(svbool_t, uint64_t *, svint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u16))) -void svst1q_scatter_offset(svbool_t, uint16_t *, svint64_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_bf16))) -void svst1q_scatter_offset(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s8))) -void svst1q_scatter_offset(svbool_t, int8_t *, svint64_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f64))) -void svst1q_scatter_offset(svbool_t, float64_t *, svint64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f32))) -void svst1q_scatter_offset(svbool_t, float32_t *, svint64_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f16))) -void svst1q_scatter_offset(svbool_t, float16_t *, svint64_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s32))) -void svst1q_scatter_offset(svbool_t, int32_t *, svint64_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s64))) -void svst1q_scatter_offset(svbool_t, int64_t *, svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_mf8))) -void svst1q_scatter_offset(svbool_t, mfloat8_t *, svint64_t, svmfloat8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s16))) -void svst1q_scatter_offset(svbool_t, int16_t *, svint64_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u8))) -void svst1q_scatter_offset(svbool_t, uint8_t *, svuint64_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u32))) -void svst1q_scatter_offset(svbool_t, uint32_t *, svuint64_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u64))) -void svst1q_scatter_offset(svbool_t, uint64_t *, svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u16))) -void svst1q_scatter_offset(svbool_t, uint16_t *, svuint64_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_bf16))) -void svst1q_scatter_offset(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s8))) -void svst1q_scatter_offset(svbool_t, int8_t *, svuint64_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f64))) -void svst1q_scatter_offset(svbool_t, float64_t *, svuint64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f32))) -void svst1q_scatter_offset(svbool_t, float32_t *, svuint64_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f16))) -void svst1q_scatter_offset(svbool_t, float16_t *, svuint64_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s32))) -void svst1q_scatter_offset(svbool_t, int32_t *, svuint64_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s64))) -void svst1q_scatter_offset(svbool_t, int64_t *, svuint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_mf8))) -void svst1q_scatter_offset(svbool_t, mfloat8_t *, svuint64_t, svmfloat8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s16))) -void svst1q_scatter_offset(svbool_t, int16_t *, svuint64_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_u32))) -void svst1wq(svbool_t, uint32_t *, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_f32))) -void svst1wq(svbool_t, float32_t *, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_s32))) -void svst1wq(svbool_t, int32_t *, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_u32))) -void svst1wq_vnum(svbool_t, uint32_t *, int64_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_f32))) -void svst1wq_vnum(svbool_t, float32_t *, int64_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_s32))) -void svst1wq_vnum(svbool_t, int32_t *, int64_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c8))) -uint64_t svcntp_c8(svcount_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c32))) -uint64_t svcntp_c32(svcount_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c64))) -uint64_t svcntp_c64(svcount_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c16))) -uint64_t svcntp_c16(svcount_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x2))) -svuint8x2_t svld1_u8_x2(svcount_t, uint8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x2))) -svint8x2_t svld1_s8_x2(svcount_t, int8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x2))) -svmfloat8x2_t svld1_mf8_x2(svcount_t, mfloat8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x2))) -svuint64x2_t svld1_u64_x2(svcount_t, uint64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x2))) -svfloat64x2_t svld1_f64_x2(svcount_t, float64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x2))) -svint64x2_t svld1_s64_x2(svcount_t, int64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x2))) -svuint16x2_t svld1_u16_x2(svcount_t, uint16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x2))) -svbfloat16x2_t svld1_bf16_x2(svcount_t, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x2))) -svfloat16x2_t svld1_f16_x2(svcount_t, float16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x2))) -svint16x2_t svld1_s16_x2(svcount_t, int16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x2))) -svuint32x2_t svld1_u32_x2(svcount_t, uint32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x2))) -svfloat32x2_t svld1_f32_x2(svcount_t, float32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x2))) -svint32x2_t svld1_s32_x2(svcount_t, int32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x4))) -svuint8x4_t svld1_u8_x4(svcount_t, uint8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x4))) -svint8x4_t svld1_s8_x4(svcount_t, int8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x4))) -svmfloat8x4_t svld1_mf8_x4(svcount_t, mfloat8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x4))) -svuint64x4_t svld1_u64_x4(svcount_t, uint64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x4))) -svfloat64x4_t svld1_f64_x4(svcount_t, float64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x4))) -svint64x4_t svld1_s64_x4(svcount_t, int64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x4))) -svuint16x4_t svld1_u16_x4(svcount_t, uint16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x4))) -svbfloat16x4_t svld1_bf16_x4(svcount_t, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x4))) -svfloat16x4_t svld1_f16_x4(svcount_t, float16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x4))) -svint16x4_t svld1_s16_x4(svcount_t, int16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x4))) -svuint32x4_t svld1_u32_x4(svcount_t, uint32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x4))) -svfloat32x4_t svld1_f32_x4(svcount_t, float32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x4))) -svint32x4_t svld1_s32_x4(svcount_t, int32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x2))) -svuint8x2_t svld1_vnum_u8_x2(svcount_t, uint8_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x2))) -svint8x2_t svld1_vnum_s8_x2(svcount_t, int8_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x2))) -svmfloat8x2_t svld1_vnum_mf8_x2(svcount_t, mfloat8_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x2))) -svuint64x2_t svld1_vnum_u64_x2(svcount_t, uint64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x2))) -svfloat64x2_t svld1_vnum_f64_x2(svcount_t, float64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x2))) -svint64x2_t svld1_vnum_s64_x2(svcount_t, int64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x2))) -svuint16x2_t svld1_vnum_u16_x2(svcount_t, uint16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x2))) -svbfloat16x2_t svld1_vnum_bf16_x2(svcount_t, bfloat16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x2))) -svfloat16x2_t svld1_vnum_f16_x2(svcount_t, float16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x2))) -svint16x2_t svld1_vnum_s16_x2(svcount_t, int16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x2))) -svuint32x2_t svld1_vnum_u32_x2(svcount_t, uint32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x2))) -svfloat32x2_t svld1_vnum_f32_x2(svcount_t, float32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x2))) -svint32x2_t svld1_vnum_s32_x2(svcount_t, int32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x4))) -svuint8x4_t svld1_vnum_u8_x4(svcount_t, uint8_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x4))) -svint8x4_t svld1_vnum_s8_x4(svcount_t, int8_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x4))) -svmfloat8x4_t svld1_vnum_mf8_x4(svcount_t, mfloat8_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x4))) -svuint64x4_t svld1_vnum_u64_x4(svcount_t, uint64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x4))) -svfloat64x4_t svld1_vnum_f64_x4(svcount_t, float64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x4))) -svint64x4_t svld1_vnum_s64_x4(svcount_t, int64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x4))) -svuint16x4_t svld1_vnum_u16_x4(svcount_t, uint16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x4))) -svbfloat16x4_t svld1_vnum_bf16_x4(svcount_t, bfloat16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x4))) -svfloat16x4_t svld1_vnum_f16_x4(svcount_t, float16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x4))) -svint16x4_t svld1_vnum_s16_x4(svcount_t, int16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x4))) -svuint32x4_t svld1_vnum_u32_x4(svcount_t, uint32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x4))) -svfloat32x4_t svld1_vnum_f32_x4(svcount_t, float32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x4))) -svint32x4_t svld1_vnum_s32_x4(svcount_t, int32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x2))) -svuint8x2_t svldnt1_u8_x2(svcount_t, uint8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x2))) -svint8x2_t svldnt1_s8_x2(svcount_t, int8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x2))) -svmfloat8x2_t svldnt1_mf8_x2(svcount_t, mfloat8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x2))) -svuint64x2_t svldnt1_u64_x2(svcount_t, uint64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x2))) -svfloat64x2_t svldnt1_f64_x2(svcount_t, float64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x2))) -svint64x2_t svldnt1_s64_x2(svcount_t, int64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x2))) -svuint16x2_t svldnt1_u16_x2(svcount_t, uint16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x2))) -svbfloat16x2_t svldnt1_bf16_x2(svcount_t, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x2))) -svfloat16x2_t svldnt1_f16_x2(svcount_t, float16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x2))) -svint16x2_t svldnt1_s16_x2(svcount_t, int16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x2))) -svuint32x2_t svldnt1_u32_x2(svcount_t, uint32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x2))) -svfloat32x2_t svldnt1_f32_x2(svcount_t, float32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x2))) -svint32x2_t svldnt1_s32_x2(svcount_t, int32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x4))) -svuint8x4_t svldnt1_u8_x4(svcount_t, uint8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x4))) -svint8x4_t svldnt1_s8_x4(svcount_t, int8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x4))) -svmfloat8x4_t svldnt1_mf8_x4(svcount_t, mfloat8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x4))) -svuint64x4_t svldnt1_u64_x4(svcount_t, uint64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x4))) -svfloat64x4_t svldnt1_f64_x4(svcount_t, float64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x4))) -svint64x4_t svldnt1_s64_x4(svcount_t, int64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x4))) -svuint16x4_t svldnt1_u16_x4(svcount_t, uint16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x4))) -svbfloat16x4_t svldnt1_bf16_x4(svcount_t, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x4))) -svfloat16x4_t svldnt1_f16_x4(svcount_t, float16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x4))) -svint16x4_t svldnt1_s16_x4(svcount_t, int16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x4))) -svuint32x4_t svldnt1_u32_x4(svcount_t, uint32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x4))) -svfloat32x4_t svldnt1_f32_x4(svcount_t, float32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x4))) -svint32x4_t svldnt1_s32_x4(svcount_t, int32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x2))) -svuint8x2_t svldnt1_vnum_u8_x2(svcount_t, uint8_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x2))) -svint8x2_t svldnt1_vnum_s8_x2(svcount_t, int8_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x2))) -svmfloat8x2_t svldnt1_vnum_mf8_x2(svcount_t, mfloat8_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x2))) -svuint64x2_t svldnt1_vnum_u64_x2(svcount_t, uint64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x2))) -svfloat64x2_t svldnt1_vnum_f64_x2(svcount_t, float64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x2))) -svint64x2_t svldnt1_vnum_s64_x2(svcount_t, int64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x2))) -svuint16x2_t svldnt1_vnum_u16_x2(svcount_t, uint16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x2))) -svbfloat16x2_t svldnt1_vnum_bf16_x2(svcount_t, bfloat16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x2))) -svfloat16x2_t svldnt1_vnum_f16_x2(svcount_t, float16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x2))) -svint16x2_t svldnt1_vnum_s16_x2(svcount_t, int16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x2))) -svuint32x2_t svldnt1_vnum_u32_x2(svcount_t, uint32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x2))) -svfloat32x2_t svldnt1_vnum_f32_x2(svcount_t, float32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x2))) -svint32x2_t svldnt1_vnum_s32_x2(svcount_t, int32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x4))) -svuint8x4_t svldnt1_vnum_u8_x4(svcount_t, uint8_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x4))) -svint8x4_t svldnt1_vnum_s8_x4(svcount_t, int8_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x4))) -svmfloat8x4_t svldnt1_vnum_mf8_x4(svcount_t, mfloat8_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x4))) -svuint64x4_t svldnt1_vnum_u64_x4(svcount_t, uint64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x4))) -svfloat64x4_t svldnt1_vnum_f64_x4(svcount_t, float64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x4))) -svint64x4_t svldnt1_vnum_s64_x4(svcount_t, int64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x4))) -svuint16x4_t svldnt1_vnum_u16_x4(svcount_t, uint16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x4))) -svbfloat16x4_t svldnt1_vnum_bf16_x4(svcount_t, bfloat16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x4))) -svfloat16x4_t svldnt1_vnum_f16_x4(svcount_t, float16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x4))) -svint16x4_t svldnt1_vnum_s16_x4(svcount_t, int16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x4))) -svuint32x4_t svldnt1_vnum_u32_x4(svcount_t, uint32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x4))) -svfloat32x4_t svldnt1_vnum_f32_x4(svcount_t, float32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x4))) -svint32x4_t svldnt1_vnum_s32_x4(svcount_t, int32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c8))) -svbool_t svpext_lane_c8(svcount_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c32))) -svbool_t svpext_lane_c32(svcount_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c64))) -svbool_t svpext_lane_c64(svcount_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c16))) -svbool_t svpext_lane_c16(svcount_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c8_x2))) -svboolx2_t svpext_lane_c8_x2(svcount_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c32_x2))) -svboolx2_t svpext_lane_c32_x2(svcount_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c64_x2))) -svboolx2_t svpext_lane_c64_x2(svcount_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c16_x2))) -svboolx2_t svpext_lane_c16_x2(svcount_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpfalse_c))) -svcount_t svpfalse_c(void); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c16))) -svcount_t svpsel_lane_c16(svcount_t, svbool_t, uint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c32))) -svcount_t svpsel_lane_c32(svcount_t, svbool_t, uint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c64))) -svcount_t svpsel_lane_c64(svcount_t, svbool_t, uint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c8))) -svcount_t svpsel_lane_c8(svcount_t, svbool_t, uint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c8))) -svcount_t svptrue_c8(void); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c32))) -svcount_t svptrue_c32(void); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c64))) -svcount_t svptrue_c64(void); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c16))) -svcount_t svptrue_c16(void); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_b))) -svbool_t svreinterpret_b(svcount_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_c))) -svcount_t svreinterpret_c(svbool_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x2))) -void svst1_u8_x2(svcount_t, uint8_t *, svuint8x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x2))) -void svst1_s8_x2(svcount_t, int8_t *, svint8x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x2))) -void svst1_mf8_x2(svcount_t, mfloat8_t *, svmfloat8x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x2))) -void svst1_u64_x2(svcount_t, uint64_t *, svuint64x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x2))) -void svst1_f64_x2(svcount_t, float64_t *, svfloat64x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x2))) -void svst1_s64_x2(svcount_t, int64_t *, svint64x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x2))) -void svst1_u16_x2(svcount_t, uint16_t *, svuint16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x2))) -void svst1_bf16_x2(svcount_t, bfloat16_t *, svbfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x2))) -void svst1_f16_x2(svcount_t, float16_t *, svfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x2))) -void svst1_s16_x2(svcount_t, int16_t *, svint16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x2))) -void svst1_u32_x2(svcount_t, uint32_t *, svuint32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x2))) -void svst1_f32_x2(svcount_t, float32_t *, svfloat32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x2))) -void svst1_s32_x2(svcount_t, int32_t *, svint32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x4))) -void svst1_u8_x4(svcount_t, uint8_t *, svuint8x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x4))) -void svst1_s8_x4(svcount_t, int8_t *, svint8x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x4))) -void svst1_mf8_x4(svcount_t, mfloat8_t *, svmfloat8x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x4))) -void svst1_u64_x4(svcount_t, uint64_t *, svuint64x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x4))) -void svst1_f64_x4(svcount_t, float64_t *, svfloat64x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x4))) -void svst1_s64_x4(svcount_t, int64_t *, svint64x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x4))) -void svst1_u16_x4(svcount_t, uint16_t *, svuint16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x4))) -void svst1_bf16_x4(svcount_t, bfloat16_t *, svbfloat16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x4))) -void svst1_f16_x4(svcount_t, float16_t *, svfloat16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x4))) -void svst1_s16_x4(svcount_t, int16_t *, svint16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x4))) -void svst1_u32_x4(svcount_t, uint32_t *, svuint32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x4))) -void svst1_f32_x4(svcount_t, float32_t *, svfloat32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x4))) -void svst1_s32_x4(svcount_t, int32_t *, svint32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x2))) -void svst1_vnum_u8_x2(svcount_t, uint8_t *, int64_t, svuint8x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x2))) -void svst1_vnum_s8_x2(svcount_t, int8_t *, int64_t, svint8x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x2))) -void svst1_vnum_mf8_x2(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x2))) -void svst1_vnum_u64_x2(svcount_t, uint64_t *, int64_t, svuint64x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x2))) -void svst1_vnum_f64_x2(svcount_t, float64_t *, int64_t, svfloat64x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x2))) -void svst1_vnum_s64_x2(svcount_t, int64_t *, int64_t, svint64x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x2))) -void svst1_vnum_u16_x2(svcount_t, uint16_t *, int64_t, svuint16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x2))) -void svst1_vnum_bf16_x2(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x2))) -void svst1_vnum_f16_x2(svcount_t, float16_t *, int64_t, svfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x2))) -void svst1_vnum_s16_x2(svcount_t, int16_t *, int64_t, svint16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x2))) -void svst1_vnum_u32_x2(svcount_t, uint32_t *, int64_t, svuint32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x2))) -void svst1_vnum_f32_x2(svcount_t, float32_t *, int64_t, svfloat32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x2))) -void svst1_vnum_s32_x2(svcount_t, int32_t *, int64_t, svint32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x4))) -void svst1_vnum_u8_x4(svcount_t, uint8_t *, int64_t, svuint8x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x4))) -void svst1_vnum_s8_x4(svcount_t, int8_t *, int64_t, svint8x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x4))) -void svst1_vnum_mf8_x4(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x4))) -void svst1_vnum_u64_x4(svcount_t, uint64_t *, int64_t, svuint64x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x4))) -void svst1_vnum_f64_x4(svcount_t, float64_t *, int64_t, svfloat64x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x4))) -void svst1_vnum_s64_x4(svcount_t, int64_t *, int64_t, svint64x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x4))) -void svst1_vnum_u16_x4(svcount_t, uint16_t *, int64_t, svuint16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x4))) -void svst1_vnum_bf16_x4(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x4))) -void svst1_vnum_f16_x4(svcount_t, float16_t *, int64_t, svfloat16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x4))) -void svst1_vnum_s16_x4(svcount_t, int16_t *, int64_t, svint16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x4))) -void svst1_vnum_u32_x4(svcount_t, uint32_t *, int64_t, svuint32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x4))) -void svst1_vnum_f32_x4(svcount_t, float32_t *, int64_t, svfloat32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x4))) -void svst1_vnum_s32_x4(svcount_t, int32_t *, int64_t, svint32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x2))) -void svstnt1_u8_x2(svcount_t, uint8_t *, svuint8x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x2))) -void svstnt1_s8_x2(svcount_t, int8_t *, svint8x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x2))) -void svstnt1_mf8_x2(svcount_t, mfloat8_t *, svmfloat8x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x2))) -void svstnt1_u64_x2(svcount_t, uint64_t *, svuint64x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x2))) -void svstnt1_f64_x2(svcount_t, float64_t *, svfloat64x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x2))) -void svstnt1_s64_x2(svcount_t, int64_t *, svint64x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x2))) -void svstnt1_u16_x2(svcount_t, uint16_t *, svuint16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x2))) -void svstnt1_bf16_x2(svcount_t, bfloat16_t *, svbfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x2))) -void svstnt1_f16_x2(svcount_t, float16_t *, svfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x2))) -void svstnt1_s16_x2(svcount_t, int16_t *, svint16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x2))) -void svstnt1_u32_x2(svcount_t, uint32_t *, svuint32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x2))) -void svstnt1_f32_x2(svcount_t, float32_t *, svfloat32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x2))) -void svstnt1_s32_x2(svcount_t, int32_t *, svint32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x4))) -void svstnt1_u8_x4(svcount_t, uint8_t *, svuint8x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x4))) -void svstnt1_s8_x4(svcount_t, int8_t *, svint8x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x4))) -void svstnt1_mf8_x4(svcount_t, mfloat8_t *, svmfloat8x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x4))) -void svstnt1_u64_x4(svcount_t, uint64_t *, svuint64x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x4))) -void svstnt1_f64_x4(svcount_t, float64_t *, svfloat64x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x4))) -void svstnt1_s64_x4(svcount_t, int64_t *, svint64x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x4))) -void svstnt1_u16_x4(svcount_t, uint16_t *, svuint16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x4))) -void svstnt1_bf16_x4(svcount_t, bfloat16_t *, svbfloat16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x4))) -void svstnt1_f16_x4(svcount_t, float16_t *, svfloat16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x4))) -void svstnt1_s16_x4(svcount_t, int16_t *, svint16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x4))) -void svstnt1_u32_x4(svcount_t, uint32_t *, svuint32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x4))) -void svstnt1_f32_x4(svcount_t, float32_t *, svfloat32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x4))) -void svstnt1_s32_x4(svcount_t, int32_t *, svint32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x2))) -void svstnt1_vnum_u8_x2(svcount_t, uint8_t *, int64_t, svuint8x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x2))) -void svstnt1_vnum_s8_x2(svcount_t, int8_t *, int64_t, svint8x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x2))) -void svstnt1_vnum_mf8_x2(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x2))) -void svstnt1_vnum_u64_x2(svcount_t, uint64_t *, int64_t, svuint64x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x2))) -void svstnt1_vnum_f64_x2(svcount_t, float64_t *, int64_t, svfloat64x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x2))) -void svstnt1_vnum_s64_x2(svcount_t, int64_t *, int64_t, svint64x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x2))) -void svstnt1_vnum_u16_x2(svcount_t, uint16_t *, int64_t, svuint16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x2))) -void svstnt1_vnum_bf16_x2(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x2))) -void svstnt1_vnum_f16_x2(svcount_t, float16_t *, int64_t, svfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x2))) -void svstnt1_vnum_s16_x2(svcount_t, int16_t *, int64_t, svint16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x2))) -void svstnt1_vnum_u32_x2(svcount_t, uint32_t *, int64_t, svuint32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x2))) -void svstnt1_vnum_f32_x2(svcount_t, float32_t *, int64_t, svfloat32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x2))) -void svstnt1_vnum_s32_x2(svcount_t, int32_t *, int64_t, svint32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x4))) -void svstnt1_vnum_u8_x4(svcount_t, uint8_t *, int64_t, svuint8x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x4))) -void svstnt1_vnum_s8_x4(svcount_t, int8_t *, int64_t, svint8x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x4))) -void svstnt1_vnum_mf8_x4(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x4))) -void svstnt1_vnum_u64_x4(svcount_t, uint64_t *, int64_t, svuint64x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x4))) -void svstnt1_vnum_f64_x4(svcount_t, float64_t *, int64_t, svfloat64x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x4))) -void svstnt1_vnum_s64_x4(svcount_t, int64_t *, int64_t, svint64x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x4))) -void svstnt1_vnum_u16_x4(svcount_t, uint16_t *, int64_t, svuint16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x4))) -void svstnt1_vnum_bf16_x4(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x4))) -void svstnt1_vnum_f16_x4(svcount_t, float16_t *, int64_t, svfloat16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x4))) -void svstnt1_vnum_s16_x4(svcount_t, int16_t *, int64_t, svint16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x4))) -void svstnt1_vnum_u32_x4(svcount_t, uint32_t *, int64_t, svuint32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x4))) -void svstnt1_vnum_f32_x4(svcount_t, float32_t *, int64_t, svfloat32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x4))) -void svstnt1_vnum_s32_x4(svcount_t, int32_t *, int64_t, svint32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_s64))) -svcount_t svwhilege_c8_s64(int64_t, int64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_s64))) -svcount_t svwhilege_c32_s64(int64_t, int64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_s64))) -svcount_t svwhilege_c64_s64(int64_t, int64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_s64))) -svcount_t svwhilege_c16_s64(int64_t, int64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_u64))) -svcount_t svwhilege_c8_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_u64))) -svcount_t svwhilege_c32_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_u64))) -svcount_t svwhilege_c64_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_u64))) -svcount_t svwhilege_c16_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_s64))) -svcount_t svwhilegt_c8_s64(int64_t, int64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_s64))) -svcount_t svwhilegt_c32_s64(int64_t, int64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_s64))) -svcount_t svwhilegt_c64_s64(int64_t, int64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_s64))) -svcount_t svwhilegt_c16_s64(int64_t, int64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_u64))) -svcount_t svwhilegt_c8_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_u64))) -svcount_t svwhilegt_c32_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_u64))) -svcount_t svwhilegt_c64_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_u64))) -svcount_t svwhilegt_c16_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_s64))) -svcount_t svwhilele_c8_s64(int64_t, int64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_s64))) -svcount_t svwhilele_c32_s64(int64_t, int64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_s64))) -svcount_t svwhilele_c64_s64(int64_t, int64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_s64))) -svcount_t svwhilele_c16_s64(int64_t, int64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_u64))) -svcount_t svwhilele_c8_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_u64))) -svcount_t svwhilele_c32_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_u64))) -svcount_t svwhilele_c64_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_u64))) -svcount_t svwhilele_c16_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_u64))) -svcount_t svwhilelt_c8_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_u64))) -svcount_t svwhilelt_c32_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_u64))) -svcount_t svwhilelt_c64_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_u64))) -svcount_t svwhilelt_c16_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_s64))) -svcount_t svwhilelt_c8_s64(int64_t, int64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_s64))) -svcount_t svwhilelt_c32_s64(int64_t, int64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_s64))) -svcount_t svwhilelt_c64_s64(int64_t, int64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_s64))) -svcount_t svwhilelt_c16_s64(int64_t, int64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x2))) -svuint8x2_t svld1_x2(svcount_t, uint8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x2))) -svint8x2_t svld1_x2(svcount_t, int8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x2))) -svmfloat8x2_t svld1_x2(svcount_t, mfloat8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x2))) -svuint64x2_t svld1_x2(svcount_t, uint64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x2))) -svfloat64x2_t svld1_x2(svcount_t, float64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x2))) -svint64x2_t svld1_x2(svcount_t, int64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x2))) -svuint16x2_t svld1_x2(svcount_t, uint16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x2))) -svbfloat16x2_t svld1_x2(svcount_t, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x2))) -svfloat16x2_t svld1_x2(svcount_t, float16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x2))) -svint16x2_t svld1_x2(svcount_t, int16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x2))) -svuint32x2_t svld1_x2(svcount_t, uint32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x2))) -svfloat32x2_t svld1_x2(svcount_t, float32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x2))) -svint32x2_t svld1_x2(svcount_t, int32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x4))) -svuint8x4_t svld1_x4(svcount_t, uint8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x4))) -svint8x4_t svld1_x4(svcount_t, int8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x4))) -svmfloat8x4_t svld1_x4(svcount_t, mfloat8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x4))) -svuint64x4_t svld1_x4(svcount_t, uint64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x4))) -svfloat64x4_t svld1_x4(svcount_t, float64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x4))) -svint64x4_t svld1_x4(svcount_t, int64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x4))) -svuint16x4_t svld1_x4(svcount_t, uint16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x4))) -svbfloat16x4_t svld1_x4(svcount_t, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x4))) -svfloat16x4_t svld1_x4(svcount_t, float16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x4))) -svint16x4_t svld1_x4(svcount_t, int16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x4))) -svuint32x4_t svld1_x4(svcount_t, uint32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x4))) -svfloat32x4_t svld1_x4(svcount_t, float32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x4))) -svint32x4_t svld1_x4(svcount_t, int32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x2))) -svuint8x2_t svld1_vnum_x2(svcount_t, uint8_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x2))) -svint8x2_t svld1_vnum_x2(svcount_t, int8_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x2))) -svmfloat8x2_t svld1_vnum_x2(svcount_t, mfloat8_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x2))) -svuint64x2_t svld1_vnum_x2(svcount_t, uint64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x2))) -svfloat64x2_t svld1_vnum_x2(svcount_t, float64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x2))) -svint64x2_t svld1_vnum_x2(svcount_t, int64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x2))) -svuint16x2_t svld1_vnum_x2(svcount_t, uint16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x2))) -svbfloat16x2_t svld1_vnum_x2(svcount_t, bfloat16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x2))) -svfloat16x2_t svld1_vnum_x2(svcount_t, float16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x2))) -svint16x2_t svld1_vnum_x2(svcount_t, int16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x2))) -svuint32x2_t svld1_vnum_x2(svcount_t, uint32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x2))) -svfloat32x2_t svld1_vnum_x2(svcount_t, float32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x2))) -svint32x2_t svld1_vnum_x2(svcount_t, int32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x4))) -svuint8x4_t svld1_vnum_x4(svcount_t, uint8_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x4))) -svint8x4_t svld1_vnum_x4(svcount_t, int8_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x4))) -svmfloat8x4_t svld1_vnum_x4(svcount_t, mfloat8_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x4))) -svuint64x4_t svld1_vnum_x4(svcount_t, uint64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x4))) -svfloat64x4_t svld1_vnum_x4(svcount_t, float64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x4))) -svint64x4_t svld1_vnum_x4(svcount_t, int64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x4))) -svuint16x4_t svld1_vnum_x4(svcount_t, uint16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x4))) -svbfloat16x4_t svld1_vnum_x4(svcount_t, bfloat16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x4))) -svfloat16x4_t svld1_vnum_x4(svcount_t, float16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x4))) -svint16x4_t svld1_vnum_x4(svcount_t, int16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x4))) -svuint32x4_t svld1_vnum_x4(svcount_t, uint32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x4))) -svfloat32x4_t svld1_vnum_x4(svcount_t, float32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x4))) -svint32x4_t svld1_vnum_x4(svcount_t, int32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x2))) -svuint8x2_t svldnt1_x2(svcount_t, uint8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x2))) -svint8x2_t svldnt1_x2(svcount_t, int8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x2))) -svmfloat8x2_t svldnt1_x2(svcount_t, mfloat8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x2))) -svuint64x2_t svldnt1_x2(svcount_t, uint64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x2))) -svfloat64x2_t svldnt1_x2(svcount_t, float64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x2))) -svint64x2_t svldnt1_x2(svcount_t, int64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x2))) -svuint16x2_t svldnt1_x2(svcount_t, uint16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x2))) -svbfloat16x2_t svldnt1_x2(svcount_t, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x2))) -svfloat16x2_t svldnt1_x2(svcount_t, float16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x2))) -svint16x2_t svldnt1_x2(svcount_t, int16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x2))) -svuint32x2_t svldnt1_x2(svcount_t, uint32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x2))) -svfloat32x2_t svldnt1_x2(svcount_t, float32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x2))) -svint32x2_t svldnt1_x2(svcount_t, int32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x4))) -svuint8x4_t svldnt1_x4(svcount_t, uint8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x4))) -svint8x4_t svldnt1_x4(svcount_t, int8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x4))) -svmfloat8x4_t svldnt1_x4(svcount_t, mfloat8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x4))) -svuint64x4_t svldnt1_x4(svcount_t, uint64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x4))) -svfloat64x4_t svldnt1_x4(svcount_t, float64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x4))) -svint64x4_t svldnt1_x4(svcount_t, int64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x4))) -svuint16x4_t svldnt1_x4(svcount_t, uint16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x4))) -svbfloat16x4_t svldnt1_x4(svcount_t, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x4))) -svfloat16x4_t svldnt1_x4(svcount_t, float16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x4))) -svint16x4_t svldnt1_x4(svcount_t, int16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x4))) -svuint32x4_t svldnt1_x4(svcount_t, uint32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x4))) -svfloat32x4_t svldnt1_x4(svcount_t, float32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x4))) -svint32x4_t svldnt1_x4(svcount_t, int32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x2))) -svuint8x2_t svldnt1_vnum_x2(svcount_t, uint8_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x2))) -svint8x2_t svldnt1_vnum_x2(svcount_t, int8_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x2))) -svmfloat8x2_t svldnt1_vnum_x2(svcount_t, mfloat8_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x2))) -svuint64x2_t svldnt1_vnum_x2(svcount_t, uint64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x2))) -svfloat64x2_t svldnt1_vnum_x2(svcount_t, float64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x2))) -svint64x2_t svldnt1_vnum_x2(svcount_t, int64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x2))) -svuint16x2_t svldnt1_vnum_x2(svcount_t, uint16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x2))) -svbfloat16x2_t svldnt1_vnum_x2(svcount_t, bfloat16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x2))) -svfloat16x2_t svldnt1_vnum_x2(svcount_t, float16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x2))) -svint16x2_t svldnt1_vnum_x2(svcount_t, int16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x2))) -svuint32x2_t svldnt1_vnum_x2(svcount_t, uint32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x2))) -svfloat32x2_t svldnt1_vnum_x2(svcount_t, float32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x2))) -svint32x2_t svldnt1_vnum_x2(svcount_t, int32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x4))) -svuint8x4_t svldnt1_vnum_x4(svcount_t, uint8_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x4))) -svint8x4_t svldnt1_vnum_x4(svcount_t, int8_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x4))) -svmfloat8x4_t svldnt1_vnum_x4(svcount_t, mfloat8_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x4))) -svuint64x4_t svldnt1_vnum_x4(svcount_t, uint64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x4))) -svfloat64x4_t svldnt1_vnum_x4(svcount_t, float64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x4))) -svint64x4_t svldnt1_vnum_x4(svcount_t, int64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x4))) -svuint16x4_t svldnt1_vnum_x4(svcount_t, uint16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x4))) -svbfloat16x4_t svldnt1_vnum_x4(svcount_t, bfloat16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x4))) -svfloat16x4_t svldnt1_vnum_x4(svcount_t, float16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x4))) -svint16x4_t svldnt1_vnum_x4(svcount_t, int16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x4))) -svuint32x4_t svldnt1_vnum_x4(svcount_t, uint32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x4))) -svfloat32x4_t svldnt1_vnum_x4(svcount_t, float32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x4))) -svint32x4_t svldnt1_vnum_x4(svcount_t, int32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_b))) -svbool_t svreinterpret(svcount_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_c))) -svcount_t svreinterpret(svbool_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x2))) -void svst1(svcount_t, uint8_t *, svuint8x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x2))) -void svst1(svcount_t, int8_t *, svint8x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x2))) -void svst1(svcount_t, mfloat8_t *, svmfloat8x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x2))) -void svst1(svcount_t, uint64_t *, svuint64x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x2))) -void svst1(svcount_t, float64_t *, svfloat64x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x2))) -void svst1(svcount_t, int64_t *, svint64x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x2))) -void svst1(svcount_t, uint16_t *, svuint16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x2))) -void svst1(svcount_t, bfloat16_t *, svbfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x2))) -void svst1(svcount_t, float16_t *, svfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x2))) -void svst1(svcount_t, int16_t *, svint16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x2))) -void svst1(svcount_t, uint32_t *, svuint32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x2))) -void svst1(svcount_t, float32_t *, svfloat32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x2))) -void svst1(svcount_t, int32_t *, svint32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x4))) -void svst1(svcount_t, uint8_t *, svuint8x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x4))) -void svst1(svcount_t, int8_t *, svint8x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x4))) -void svst1(svcount_t, mfloat8_t *, svmfloat8x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x4))) -void svst1(svcount_t, uint64_t *, svuint64x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x4))) -void svst1(svcount_t, float64_t *, svfloat64x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x4))) -void svst1(svcount_t, int64_t *, svint64x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x4))) -void svst1(svcount_t, uint16_t *, svuint16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x4))) -void svst1(svcount_t, bfloat16_t *, svbfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x4))) -void svst1(svcount_t, float16_t *, svfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x4))) -void svst1(svcount_t, int16_t *, svint16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x4))) -void svst1(svcount_t, uint32_t *, svuint32x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x4))) -void svst1(svcount_t, float32_t *, svfloat32x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x4))) -void svst1(svcount_t, int32_t *, svint32x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x2))) -void svst1_vnum(svcount_t, uint8_t *, int64_t, svuint8x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x2))) -void svst1_vnum(svcount_t, int8_t *, int64_t, svint8x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x2))) -void svst1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x2))) -void svst1_vnum(svcount_t, uint64_t *, int64_t, svuint64x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x2))) -void svst1_vnum(svcount_t, float64_t *, int64_t, svfloat64x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x2))) -void svst1_vnum(svcount_t, int64_t *, int64_t, svint64x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x2))) -void svst1_vnum(svcount_t, uint16_t *, int64_t, svuint16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x2))) -void svst1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x2))) -void svst1_vnum(svcount_t, float16_t *, int64_t, svfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x2))) -void svst1_vnum(svcount_t, int16_t *, int64_t, svint16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x2))) -void svst1_vnum(svcount_t, uint32_t *, int64_t, svuint32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x2))) -void svst1_vnum(svcount_t, float32_t *, int64_t, svfloat32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x2))) -void svst1_vnum(svcount_t, int32_t *, int64_t, svint32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x4))) -void svst1_vnum(svcount_t, uint8_t *, int64_t, svuint8x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x4))) -void svst1_vnum(svcount_t, int8_t *, int64_t, svint8x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x4))) -void svst1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x4))) -void svst1_vnum(svcount_t, uint64_t *, int64_t, svuint64x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x4))) -void svst1_vnum(svcount_t, float64_t *, int64_t, svfloat64x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x4))) -void svst1_vnum(svcount_t, int64_t *, int64_t, svint64x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x4))) -void svst1_vnum(svcount_t, uint16_t *, int64_t, svuint16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x4))) -void svst1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x4))) -void svst1_vnum(svcount_t, float16_t *, int64_t, svfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x4))) -void svst1_vnum(svcount_t, int16_t *, int64_t, svint16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x4))) -void svst1_vnum(svcount_t, uint32_t *, int64_t, svuint32x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x4))) -void svst1_vnum(svcount_t, float32_t *, int64_t, svfloat32x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x4))) -void svst1_vnum(svcount_t, int32_t *, int64_t, svint32x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x2))) -void svstnt1(svcount_t, uint8_t *, svuint8x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x2))) -void svstnt1(svcount_t, int8_t *, svint8x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x2))) -void svstnt1(svcount_t, mfloat8_t *, svmfloat8x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x2))) -void svstnt1(svcount_t, uint64_t *, svuint64x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x2))) -void svstnt1(svcount_t, float64_t *, svfloat64x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x2))) -void svstnt1(svcount_t, int64_t *, svint64x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x2))) -void svstnt1(svcount_t, uint16_t *, svuint16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x2))) -void svstnt1(svcount_t, bfloat16_t *, svbfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x2))) -void svstnt1(svcount_t, float16_t *, svfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x2))) -void svstnt1(svcount_t, int16_t *, svint16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x2))) -void svstnt1(svcount_t, uint32_t *, svuint32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x2))) -void svstnt1(svcount_t, float32_t *, svfloat32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x2))) -void svstnt1(svcount_t, int32_t *, svint32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x4))) -void svstnt1(svcount_t, uint8_t *, svuint8x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x4))) -void svstnt1(svcount_t, int8_t *, svint8x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x4))) -void svstnt1(svcount_t, mfloat8_t *, svmfloat8x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x4))) -void svstnt1(svcount_t, uint64_t *, svuint64x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x4))) -void svstnt1(svcount_t, float64_t *, svfloat64x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x4))) -void svstnt1(svcount_t, int64_t *, svint64x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x4))) -void svstnt1(svcount_t, uint16_t *, svuint16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x4))) -void svstnt1(svcount_t, bfloat16_t *, svbfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x4))) -void svstnt1(svcount_t, float16_t *, svfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x4))) -void svstnt1(svcount_t, int16_t *, svint16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x4))) -void svstnt1(svcount_t, uint32_t *, svuint32x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x4))) -void svstnt1(svcount_t, float32_t *, svfloat32x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x4))) -void svstnt1(svcount_t, int32_t *, svint32x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x2))) -void svstnt1_vnum(svcount_t, uint8_t *, int64_t, svuint8x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x2))) -void svstnt1_vnum(svcount_t, int8_t *, int64_t, svint8x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x2))) -void svstnt1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x2))) -void svstnt1_vnum(svcount_t, uint64_t *, int64_t, svuint64x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x2))) -void svstnt1_vnum(svcount_t, float64_t *, int64_t, svfloat64x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x2))) -void svstnt1_vnum(svcount_t, int64_t *, int64_t, svint64x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x2))) -void svstnt1_vnum(svcount_t, uint16_t *, int64_t, svuint16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x2))) -void svstnt1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x2))) -void svstnt1_vnum(svcount_t, float16_t *, int64_t, svfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x2))) -void svstnt1_vnum(svcount_t, int16_t *, int64_t, svint16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x2))) -void svstnt1_vnum(svcount_t, uint32_t *, int64_t, svuint32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x2))) -void svstnt1_vnum(svcount_t, float32_t *, int64_t, svfloat32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x2))) -void svstnt1_vnum(svcount_t, int32_t *, int64_t, svint32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x4))) -void svstnt1_vnum(svcount_t, uint8_t *, int64_t, svuint8x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x4))) -void svstnt1_vnum(svcount_t, int8_t *, int64_t, svint8x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x4))) -void svstnt1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x4))) -void svstnt1_vnum(svcount_t, uint64_t *, int64_t, svuint64x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x4))) -void svstnt1_vnum(svcount_t, float64_t *, int64_t, svfloat64x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x4))) -void svstnt1_vnum(svcount_t, int64_t *, int64_t, svint64x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x4))) -void svstnt1_vnum(svcount_t, uint16_t *, int64_t, svuint16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x4))) -void svstnt1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x4))) -void svstnt1_vnum(svcount_t, float16_t *, int64_t, svfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x4))) -void svstnt1_vnum(svcount_t, int16_t *, int64_t, svint16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x4))) -void svstnt1_vnum(svcount_t, uint32_t *, int64_t, svuint32x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x4))) -void svstnt1_vnum(svcount_t, float32_t *, int64_t, svfloat32x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x4))) -void svstnt1_vnum(svcount_t, int32_t *, int64_t, svint32x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_s64))) -svcount_t svwhilege_c8(int64_t, int64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_s64))) -svcount_t svwhilege_c32(int64_t, int64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_s64))) -svcount_t svwhilege_c64(int64_t, int64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_s64))) -svcount_t svwhilege_c16(int64_t, int64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_u64))) -svcount_t svwhilege_c8(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_u64))) -svcount_t svwhilege_c32(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_u64))) -svcount_t svwhilege_c64(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_u64))) -svcount_t svwhilege_c16(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_s64))) -svcount_t svwhilegt_c8(int64_t, int64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_s64))) -svcount_t svwhilegt_c32(int64_t, int64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_s64))) -svcount_t svwhilegt_c64(int64_t, int64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_s64))) -svcount_t svwhilegt_c16(int64_t, int64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_u64))) -svcount_t svwhilegt_c8(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_u64))) -svcount_t svwhilegt_c32(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_u64))) -svcount_t svwhilegt_c64(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_u64))) -svcount_t svwhilegt_c16(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_s64))) -svcount_t svwhilele_c8(int64_t, int64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_s64))) -svcount_t svwhilele_c32(int64_t, int64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_s64))) -svcount_t svwhilele_c64(int64_t, int64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_s64))) -svcount_t svwhilele_c16(int64_t, int64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_u64))) -svcount_t svwhilele_c8(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_u64))) -svcount_t svwhilele_c32(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_u64))) -svcount_t svwhilele_c64(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_u64))) -svcount_t svwhilele_c16(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_u64))) -svcount_t svwhilelt_c8(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_u64))) -svcount_t svwhilelt_c32(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_u64))) -svcount_t svwhilelt_c64(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_u64))) -svcount_t svwhilelt_c16(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_s64))) -svcount_t svwhilelt_c8(int64_t, int64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_s64))) -svcount_t svwhilelt_c32(int64_t, int64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_s64))) -svcount_t svwhilelt_c64(int64_t, int64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_s64))) -svcount_t svwhilelt_c16(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f32_f16_z))) +svfloat32_t svcvtlt_f32_f16_z(svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f64_f32_z))) +svfloat64_t svcvtlt_f64_f32_z(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_bf16_f32_z))) +svbfloat16_t svcvtnt_bf16_f32_z(svbfloat16_t, svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_f16_f32_z))) +svfloat16_t svcvtnt_f16_f32_z(svfloat16_t, svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_f32_f64_z))) +svfloat32_t svcvtnt_f32_f64_z(svfloat32_t, svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtxnt_f32_f64_z))) +svfloat32_t svcvtxnt_f32_f64_z(svfloat32_t, svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svfirstp_b8))) +int64_t svfirstp_b8(svbool_t, svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svfirstp_b32))) +int64_t svfirstp_b32(svbool_t, svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svfirstp_b64))) +int64_t svfirstp_b64(svbool_t, svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svfirstp_b16))) +int64_t svfirstp_b16(svbool_t, svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastp_b8))) +int64_t svlastp_b8(svbool_t, svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastp_b32))) +int64_t svlastp_b32(svbool_t, svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastp_b64))) +int64_t svlastp_b64(svbool_t, svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastp_b16))) +int64_t svlastp_b16(svbool_t, svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f64_m))) +svfloat64_t svrint32x_f64_m(svfloat64_t, svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f32_m))) +svfloat32_t svrint32x_f32_m(svfloat32_t, svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f64_x))) +svfloat64_t svrint32x_f64_x(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f32_x))) +svfloat32_t svrint32x_f32_x(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f64_z))) +svfloat64_t svrint32x_f64_z(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f32_z))) +svfloat32_t svrint32x_f32_z(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f64_m))) +svfloat64_t svrint32z_f64_m(svfloat64_t, svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f32_m))) +svfloat32_t svrint32z_f32_m(svfloat32_t, svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f64_x))) +svfloat64_t svrint32z_f64_x(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f32_x))) +svfloat32_t svrint32z_f32_x(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f64_z))) +svfloat64_t svrint32z_f64_z(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f32_z))) +svfloat32_t svrint32z_f32_z(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f64_m))) +svfloat64_t svrint64x_f64_m(svfloat64_t, svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f32_m))) +svfloat32_t svrint64x_f32_m(svfloat32_t, svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f64_x))) +svfloat64_t svrint64x_f64_x(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f32_x))) +svfloat32_t svrint64x_f32_x(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f64_z))) +svfloat64_t svrint64x_f64_z(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f32_z))) +svfloat32_t svrint64x_f32_z(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f64_m))) +svfloat64_t svrint64z_f64_m(svfloat64_t, svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f32_m))) +svfloat32_t svrint64z_f32_m(svfloat32_t, svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f64_x))) +svfloat64_t svrint64z_f64_x(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f32_x))) +svfloat32_t svrint64z_f32_x(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f64_z))) +svfloat64_t svrint64z_f64_z(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f32_z))) +svfloat32_t svrint64z_f32_z(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f32_f16_z))) +svfloat32_t svcvtlt_f32_z(svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f64_f32_z))) +svfloat64_t svcvtlt_f64_z(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_bf16_f32_z))) +svbfloat16_t svcvtnt_bf16_z(svbfloat16_t, svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_f16_f32_z))) +svfloat16_t svcvtnt_f16_z(svfloat16_t, svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_f32_f64_z))) +svfloat32_t svcvtnt_f32_z(svfloat32_t, svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtxnt_f32_f64_z))) +svfloat32_t svcvtxnt_f32_z(svfloat32_t, svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f64_m))) +svfloat64_t svrint32x_m(svfloat64_t, svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f32_m))) +svfloat32_t svrint32x_m(svfloat32_t, svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f64_x))) +svfloat64_t svrint32x_x(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f32_x))) +svfloat32_t svrint32x_x(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f64_z))) +svfloat64_t svrint32x_z(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f32_z))) +svfloat32_t svrint32x_z(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f64_m))) +svfloat64_t svrint32z_m(svfloat64_t, svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f32_m))) +svfloat32_t svrint32z_m(svfloat32_t, svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f64_x))) +svfloat64_t svrint32z_x(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f32_x))) +svfloat32_t svrint32z_x(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f64_z))) +svfloat64_t svrint32z_z(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f32_z))) +svfloat32_t svrint32z_z(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f64_m))) +svfloat64_t svrint64x_m(svfloat64_t, svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f32_m))) +svfloat32_t svrint64x_m(svfloat32_t, svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f64_x))) +svfloat64_t svrint64x_x(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f32_x))) +svfloat32_t svrint64x_x(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f64_z))) +svfloat64_t svrint64x_z(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f32_z))) +svfloat32_t svrint64x_z(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f64_m))) +svfloat64_t svrint64z_m(svfloat64_t, svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f32_m))) +svfloat32_t svrint64z_m(svfloat32_t, svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f64_x))) +svfloat64_t svrint64z_x(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f32_x))) +svfloat32_t svrint64z_x(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f64_z))) +svfloat64_t svrint64z_z(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f32_z))) +svfloat32_t svrint64z_z(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u8))) +svuint8_t svcompact_u8(svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u16))) +svuint16_t svcompact_u16(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_bf16))) +svbfloat16_t svcompact_bf16(svbool_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s8))) +svint8_t svcompact_s8(svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f16))) +svfloat16_t svcompact_f16(svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_mf8))) +svmfloat8_t svcompact_mf8(svbool_t, svmfloat8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s16))) +svint16_t svcompact_s16(svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_u8))) +svuint8_t svexpand_u8(svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_u32))) +svuint32_t svexpand_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_u64))) +svuint64_t svexpand_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_u16))) +svuint16_t svexpand_u16(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_bf16))) +svbfloat16_t svexpand_bf16(svbool_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_s8))) +svint8_t svexpand_s8(svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_f64))) +svfloat64_t svexpand_f64(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_f32))) +svfloat32_t svexpand_f32(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_f16))) +svfloat16_t svexpand_f16(svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_s32))) +svint32_t svexpand_s32(svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_s64))) +svint64_t svexpand_s64(svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_mf8))) +svmfloat8_t svexpand_mf8(svbool_t, svmfloat8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_s16))) +svint16_t svexpand_s16(svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u8))) +svuint8_t svcompact(svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u16))) +svuint16_t svcompact(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_bf16))) +svbfloat16_t svcompact(svbool_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s8))) +svint8_t svcompact(svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f16))) +svfloat16_t svcompact(svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_mf8))) +svmfloat8_t svcompact(svbool_t, svmfloat8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s16))) +svint16_t svcompact(svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_u8))) +svuint8_t svexpand(svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_u32))) +svuint32_t svexpand(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_u64))) +svuint64_t svexpand(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_u16))) +svuint16_t svexpand(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_bf16))) +svbfloat16_t svexpand(svbool_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_s8))) +svint8_t svexpand(svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_f64))) +svfloat64_t svexpand(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_f32))) +svfloat32_t svexpand(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_f16))) +svfloat16_t svexpand(svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_s32))) +svint32_t svexpand(svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_s64))) +svint64_t svexpand(svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_mf8))) +svmfloat8_t svexpand(svbool_t, svmfloat8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_s16))) +svint16_t svexpand(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s8))) svint8_t svaba_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s32))) @@ -19059,6 +15573,3848 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s64))) svint64_t svxar(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s16))) svint16_t svxar(svint16_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_m))) +svfloat64_t svamax_n_f64_m(svbool_t, svfloat64_t, float64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_m))) +svfloat32_t svamax_n_f32_m(svbool_t, svfloat32_t, float32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_m))) +svfloat16_t svamax_n_f16_m(svbool_t, svfloat16_t, float16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_x))) +svfloat64_t svamax_n_f64_x(svbool_t, svfloat64_t, float64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_x))) +svfloat32_t svamax_n_f32_x(svbool_t, svfloat32_t, float32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_x))) +svfloat16_t svamax_n_f16_x(svbool_t, svfloat16_t, float16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_z))) +svfloat64_t svamax_n_f64_z(svbool_t, svfloat64_t, float64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_z))) +svfloat32_t svamax_n_f32_z(svbool_t, svfloat32_t, float32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_z))) +svfloat16_t svamax_n_f16_z(svbool_t, svfloat16_t, float16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_m))) +svfloat64_t svamax_f64_m(svbool_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_m))) +svfloat32_t svamax_f32_m(svbool_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_m))) +svfloat16_t svamax_f16_m(svbool_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_x))) +svfloat64_t svamax_f64_x(svbool_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_x))) +svfloat32_t svamax_f32_x(svbool_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_x))) +svfloat16_t svamax_f16_x(svbool_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_z))) +svfloat64_t svamax_f64_z(svbool_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_z))) +svfloat32_t svamax_f32_z(svbool_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_z))) +svfloat16_t svamax_f16_z(svbool_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_m))) +svfloat64_t svamin_n_f64_m(svbool_t, svfloat64_t, float64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_m))) +svfloat32_t svamin_n_f32_m(svbool_t, svfloat32_t, float32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_m))) +svfloat16_t svamin_n_f16_m(svbool_t, svfloat16_t, float16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_x))) +svfloat64_t svamin_n_f64_x(svbool_t, svfloat64_t, float64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_x))) +svfloat32_t svamin_n_f32_x(svbool_t, svfloat32_t, float32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_x))) +svfloat16_t svamin_n_f16_x(svbool_t, svfloat16_t, float16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_z))) +svfloat64_t svamin_n_f64_z(svbool_t, svfloat64_t, float64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_z))) +svfloat32_t svamin_n_f32_z(svbool_t, svfloat32_t, float32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_z))) +svfloat16_t svamin_n_f16_z(svbool_t, svfloat16_t, float16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_m))) +svfloat64_t svamin_f64_m(svbool_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_m))) +svfloat32_t svamin_f32_m(svbool_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_m))) +svfloat16_t svamin_f16_m(svbool_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_x))) +svfloat64_t svamin_f64_x(svbool_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_x))) +svfloat32_t svamin_f32_x(svbool_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_x))) +svfloat16_t svamin_f16_x(svbool_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_z))) +svfloat64_t svamin_f64_z(svbool_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_z))) +svfloat32_t svamin_f32_z(svbool_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_z))) +svfloat16_t svamin_f16_z(svbool_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_m))) +svfloat64_t svamax_m(svbool_t, svfloat64_t, float64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_m))) +svfloat32_t svamax_m(svbool_t, svfloat32_t, float32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_m))) +svfloat16_t svamax_m(svbool_t, svfloat16_t, float16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_x))) +svfloat64_t svamax_x(svbool_t, svfloat64_t, float64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_x))) +svfloat32_t svamax_x(svbool_t, svfloat32_t, float32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_x))) +svfloat16_t svamax_x(svbool_t, svfloat16_t, float16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_z))) +svfloat64_t svamax_z(svbool_t, svfloat64_t, float64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_z))) +svfloat32_t svamax_z(svbool_t, svfloat32_t, float32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_z))) +svfloat16_t svamax_z(svbool_t, svfloat16_t, float16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_m))) +svfloat64_t svamax_m(svbool_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_m))) +svfloat32_t svamax_m(svbool_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_m))) +svfloat16_t svamax_m(svbool_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_x))) +svfloat64_t svamax_x(svbool_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_x))) +svfloat32_t svamax_x(svbool_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_x))) +svfloat16_t svamax_x(svbool_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_z))) +svfloat64_t svamax_z(svbool_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_z))) +svfloat32_t svamax_z(svbool_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_z))) +svfloat16_t svamax_z(svbool_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_m))) +svfloat64_t svamin_m(svbool_t, svfloat64_t, float64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_m))) +svfloat32_t svamin_m(svbool_t, svfloat32_t, float32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_m))) +svfloat16_t svamin_m(svbool_t, svfloat16_t, float16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_x))) +svfloat64_t svamin_x(svbool_t, svfloat64_t, float64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_x))) +svfloat32_t svamin_x(svbool_t, svfloat32_t, float32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_x))) +svfloat16_t svamin_x(svbool_t, svfloat16_t, float16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_z))) +svfloat64_t svamin_z(svbool_t, svfloat64_t, float64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_z))) +svfloat32_t svamin_z(svbool_t, svfloat32_t, float32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_z))) +svfloat16_t svamin_z(svbool_t, svfloat16_t, float16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_m))) +svfloat64_t svamin_m(svbool_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_m))) +svfloat32_t svamin_m(svbool_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_m))) +svfloat16_t svamin_m(svbool_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_x))) +svfloat64_t svamin_x(svbool_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_x))) +svfloat32_t svamin_x(svbool_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_x))) +svfloat16_t svamin_x(svbool_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_z))) +svfloat64_t svamin_z(svbool_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_z))) +svfloat32_t svamin_z(svbool_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_z))) +svfloat16_t svamin_z(svbool_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_bf16_mf8_fpm))) +svbfloat16_t svcvt1_bf16_mf8_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_f16_mf8_fpm))) +svfloat16_t svcvt1_f16_mf8_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_bf16_mf8_fpm))) +svbfloat16_t svcvt2_bf16_mf8_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_f16_mf8_fpm))) +svfloat16_t svcvt2_f16_mf8_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_bf16_mf8_fpm))) +svbfloat16_t svcvtlt1_bf16_mf8_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_f16_mf8_fpm))) +svfloat16_t svcvtlt1_f16_mf8_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_bf16_mf8_fpm))) +svbfloat16_t svcvtlt2_bf16_mf8_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_f16_mf8_fpm))) +svfloat16_t svcvtlt2_f16_mf8_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_bf16_x2_fpm))) +svmfloat8_t svcvtn_mf8_bf16_x2_fpm(svbfloat16x2_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_f16_x2_fpm))) +svmfloat8_t svcvtn_mf8_f16_x2_fpm(svfloat16x2_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnb_mf8_f32_x2_fpm))) +svmfloat8_t svcvtnb_mf8_f32_x2_fpm(svfloat32x2_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_mf8_f32_x2_fpm))) +svmfloat8_t svcvtnt_mf8_f32_x2_fpm(svmfloat8_t, svfloat32x2_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_bf16_mf8_fpm))) +svbfloat16_t svcvt1_bf16_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_f16_mf8_fpm))) +svfloat16_t svcvt1_f16_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_bf16_mf8_fpm))) +svbfloat16_t svcvt2_bf16_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_f16_mf8_fpm))) +svfloat16_t svcvt2_f16_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_bf16_mf8_fpm))) +svbfloat16_t svcvtlt1_bf16_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_f16_mf8_fpm))) +svfloat16_t svcvtlt1_f16_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_bf16_mf8_fpm))) +svbfloat16_t svcvtlt2_bf16_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_f16_mf8_fpm))) +svfloat16_t svcvtlt2_f16_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_bf16_x2_fpm))) +svmfloat8_t svcvtn_mf8_fpm(svbfloat16x2_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_f16_x2_fpm))) +svmfloat8_t svcvtn_mf8_fpm(svfloat16x2_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnb_mf8_f32_x2_fpm))) +svmfloat8_t svcvtnb_mf8_fpm(svfloat32x2_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_mf8_f32_x2_fpm))) +svmfloat8_t svcvtnt_mf8_fpm(svmfloat8_t, svfloat32x2_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u8))) +svuint8_t svluti2_lane_u8(svuint8_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s8))) +svint8_t svluti2_lane_s8(svint8_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u16))) +svuint16_t svluti2_lane_u16(svuint16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_bf16))) +svbfloat16_t svluti2_lane_bf16(svbfloat16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_f16))) +svfloat16_t svluti2_lane_f16(svfloat16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s16))) +svint16_t svluti2_lane_s16(svint16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u8))) +svuint8_t svluti4_lane_u8(svuint8_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s8))) +svint8_t svluti4_lane_s8(svint8_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16))) +svuint16_t svluti4_lane_u16(svuint16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16))) +svbfloat16_t svluti4_lane_bf16(svbfloat16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16))) +svfloat16_t svluti4_lane_f16(svfloat16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16))) +svint16_t svluti4_lane_s16(svint16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16_x2))) +svuint16_t svluti4_lane_u16_x2(svuint16x2_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16_x2))) +svbfloat16_t svluti4_lane_bf16_x2(svbfloat16x2_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16_x2))) +svfloat16_t svluti4_lane_f16_x2(svfloat16x2_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16_x2))) +svint16_t svluti4_lane_s16_x2(svint16x2_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u8))) +svuint8_t svluti2_lane(svuint8_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s8))) +svint8_t svluti2_lane(svint8_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u16))) +svuint16_t svluti2_lane(svuint16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_bf16))) +svbfloat16_t svluti2_lane(svbfloat16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_f16))) +svfloat16_t svluti2_lane(svfloat16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s16))) +svint16_t svluti2_lane(svint16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u8))) +svuint8_t svluti4_lane(svuint8_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s8))) +svint8_t svluti4_lane(svint8_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16))) +svuint16_t svluti4_lane(svuint16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16))) +svbfloat16_t svluti4_lane(svbfloat16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16))) +svfloat16_t svluti4_lane(svfloat16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16))) +svint16_t svluti4_lane(svint16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16_x2))) +svuint16_t svluti4_lane(svuint16x2_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16_x2))) +svbfloat16_t svluti4_lane(svbfloat16x2_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16_x2))) +svfloat16_t svluti4_lane(svfloat16x2_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16_x2))) +svint16_t svluti4_lane(svint16x2_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmmla_f32))) +svfloat32_t svbfmmla_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmmla_f32))) +svfloat32_t svbfmmla(svfloat32_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_n_f32))) +svfloat32_t svbfdot_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_f32))) +svfloat32_t svbfdot_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_lane_f32))) +svfloat32_t svbfdot_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_n_f32))) +svfloat32_t svbfmlalb_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_f32))) +svfloat32_t svbfmlalb_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_lane_f32))) +svfloat32_t svbfmlalb_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_n_f32))) +svfloat32_t svbfmlalt_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_f32))) +svfloat32_t svbfmlalt_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_lane_f32))) +svfloat32_t svbfmlalt_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_m))) +svbfloat16_t svcvt_bf16_f32_m(svbfloat16_t, svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x))) +svbfloat16_t svcvt_bf16_f32_x(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_z))) +svbfloat16_t svcvt_bf16_f32_z(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_bf16_f32_m))) +svbfloat16_t svcvtnt_bf16_f32_m(svbfloat16_t, svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_n_f32))) +svfloat32_t svbfdot(svfloat32_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_f32))) +svfloat32_t svbfdot(svfloat32_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_lane_f32))) +svfloat32_t svbfdot_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_n_f32))) +svfloat32_t svbfmlalb(svfloat32_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_f32))) +svfloat32_t svbfmlalb(svfloat32_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_lane_f32))) +svfloat32_t svbfmlalb_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_n_f32))) +svfloat32_t svbfmlalt(svfloat32_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_f32))) +svfloat32_t svbfmlalt(svfloat32_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_lane_f32))) +svfloat32_t svbfmlalt_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_m))) +svbfloat16_t svcvt_bf16_m(svbfloat16_t, svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x))) +svbfloat16_t svcvt_bf16_x(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_z))) +svbfloat16_t svcvt_bf16_z(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_bf16_f32_m))) +svbfloat16_t svcvtnt_bf16_m(svbfloat16_t, svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32))) +svfloat32_t svmmla_f32(svfloat32_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32))) +svfloat32_t svmmla(svfloat32_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u8))) +svuint8_t svld1ro_u8(svbool_t, uint8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u32))) +svuint32_t svld1ro_u32(svbool_t, uint32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u64))) +svuint64_t svld1ro_u64(svbool_t, uint64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u16))) +svuint16_t svld1ro_u16(svbool_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_bf16))) +svbfloat16_t svld1ro_bf16(svbool_t, bfloat16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s8))) +svint8_t svld1ro_s8(svbool_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f64))) +svfloat64_t svld1ro_f64(svbool_t, float64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f32))) +svfloat32_t svld1ro_f32(svbool_t, float32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f16))) +svfloat16_t svld1ro_f16(svbool_t, float16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s32))) +svint32_t svld1ro_s32(svbool_t, int32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s64))) +svint64_t svld1ro_s64(svbool_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_mf8))) +svmfloat8_t svld1ro_mf8(svbool_t, mfloat8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s16))) +svint16_t svld1ro_s16(svbool_t, int16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f64))) +svfloat64_t svmmla_f64(svfloat64_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u8))) +svuint8_t svtrn1q_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u32))) +svuint32_t svtrn1q_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u64))) +svuint64_t svtrn1q_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u16))) +svuint16_t svtrn1q_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_bf16))) +svbfloat16_t svtrn1q_bf16(svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s8))) +svint8_t svtrn1q_s8(svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f64))) +svfloat64_t svtrn1q_f64(svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f32))) +svfloat32_t svtrn1q_f32(svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f16))) +svfloat16_t svtrn1q_f16(svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s32))) +svint32_t svtrn1q_s32(svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s64))) +svint64_t svtrn1q_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s16))) +svint16_t svtrn1q_s16(svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u8))) +svuint8_t svtrn2q_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u32))) +svuint32_t svtrn2q_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u64))) +svuint64_t svtrn2q_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u16))) +svuint16_t svtrn2q_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_bf16))) +svbfloat16_t svtrn2q_bf16(svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s8))) +svint8_t svtrn2q_s8(svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f64))) +svfloat64_t svtrn2q_f64(svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f32))) +svfloat32_t svtrn2q_f32(svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f16))) +svfloat16_t svtrn2q_f16(svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s32))) +svint32_t svtrn2q_s32(svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s64))) +svint64_t svtrn2q_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s16))) +svint16_t svtrn2q_s16(svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u8))) +svuint8_t svuzp1q_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u32))) +svuint32_t svuzp1q_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u64))) +svuint64_t svuzp1q_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u16))) +svuint16_t svuzp1q_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_bf16))) +svbfloat16_t svuzp1q_bf16(svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s8))) +svint8_t svuzp1q_s8(svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f64))) +svfloat64_t svuzp1q_f64(svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f32))) +svfloat32_t svuzp1q_f32(svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f16))) +svfloat16_t svuzp1q_f16(svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s32))) +svint32_t svuzp1q_s32(svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s64))) +svint64_t svuzp1q_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s16))) +svint16_t svuzp1q_s16(svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u8))) +svuint8_t svuzp2q_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u32))) +svuint32_t svuzp2q_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u64))) +svuint64_t svuzp2q_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u16))) +svuint16_t svuzp2q_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_bf16))) +svbfloat16_t svuzp2q_bf16(svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s8))) +svint8_t svuzp2q_s8(svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f64))) +svfloat64_t svuzp2q_f64(svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f32))) +svfloat32_t svuzp2q_f32(svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f16))) +svfloat16_t svuzp2q_f16(svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s32))) +svint32_t svuzp2q_s32(svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s64))) +svint64_t svuzp2q_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s16))) +svint16_t svuzp2q_s16(svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u8))) +svuint8_t svzip1q_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u32))) +svuint32_t svzip1q_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u64))) +svuint64_t svzip1q_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u16))) +svuint16_t svzip1q_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_bf16))) +svbfloat16_t svzip1q_bf16(svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s8))) +svint8_t svzip1q_s8(svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f64))) +svfloat64_t svzip1q_f64(svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f32))) +svfloat32_t svzip1q_f32(svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f16))) +svfloat16_t svzip1q_f16(svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s32))) +svint32_t svzip1q_s32(svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s64))) +svint64_t svzip1q_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s16))) +svint16_t svzip1q_s16(svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u8))) +svuint8_t svzip2q_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u32))) +svuint32_t svzip2q_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u64))) +svuint64_t svzip2q_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u16))) +svuint16_t svzip2q_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_bf16))) +svbfloat16_t svzip2q_bf16(svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s8))) +svint8_t svzip2q_s8(svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f64))) +svfloat64_t svzip2q_f64(svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f32))) +svfloat32_t svzip2q_f32(svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f16))) +svfloat16_t svzip2q_f16(svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s32))) +svint32_t svzip2q_s32(svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s64))) +svint64_t svzip2q_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s16))) +svint16_t svzip2q_s16(svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u8))) +svuint8_t svld1ro(svbool_t, uint8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u32))) +svuint32_t svld1ro(svbool_t, uint32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u64))) +svuint64_t svld1ro(svbool_t, uint64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u16))) +svuint16_t svld1ro(svbool_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_bf16))) +svbfloat16_t svld1ro(svbool_t, bfloat16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s8))) +svint8_t svld1ro(svbool_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f64))) +svfloat64_t svld1ro(svbool_t, float64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f32))) +svfloat32_t svld1ro(svbool_t, float32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f16))) +svfloat16_t svld1ro(svbool_t, float16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s32))) +svint32_t svld1ro(svbool_t, int32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s64))) +svint64_t svld1ro(svbool_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_mf8))) +svmfloat8_t svld1ro(svbool_t, mfloat8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s16))) +svint16_t svld1ro(svbool_t, int16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f64))) +svfloat64_t svmmla(svfloat64_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u8))) +svuint8_t svtrn1q(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u32))) +svuint32_t svtrn1q(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u64))) +svuint64_t svtrn1q(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u16))) +svuint16_t svtrn1q(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_bf16))) +svbfloat16_t svtrn1q(svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s8))) +svint8_t svtrn1q(svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f64))) +svfloat64_t svtrn1q(svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f32))) +svfloat32_t svtrn1q(svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f16))) +svfloat16_t svtrn1q(svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s32))) +svint32_t svtrn1q(svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s64))) +svint64_t svtrn1q(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s16))) +svint16_t svtrn1q(svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u8))) +svuint8_t svtrn2q(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u32))) +svuint32_t svtrn2q(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u64))) +svuint64_t svtrn2q(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u16))) +svuint16_t svtrn2q(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_bf16))) +svbfloat16_t svtrn2q(svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s8))) +svint8_t svtrn2q(svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f64))) +svfloat64_t svtrn2q(svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f32))) +svfloat32_t svtrn2q(svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f16))) +svfloat16_t svtrn2q(svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s32))) +svint32_t svtrn2q(svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s64))) +svint64_t svtrn2q(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s16))) +svint16_t svtrn2q(svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u8))) +svuint8_t svuzp1q(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u32))) +svuint32_t svuzp1q(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u64))) +svuint64_t svuzp1q(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u16))) +svuint16_t svuzp1q(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_bf16))) +svbfloat16_t svuzp1q(svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s8))) +svint8_t svuzp1q(svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f64))) +svfloat64_t svuzp1q(svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f32))) +svfloat32_t svuzp1q(svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f16))) +svfloat16_t svuzp1q(svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s32))) +svint32_t svuzp1q(svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s64))) +svint64_t svuzp1q(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s16))) +svint16_t svuzp1q(svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u8))) +svuint8_t svuzp2q(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u32))) +svuint32_t svuzp2q(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u64))) +svuint64_t svuzp2q(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u16))) +svuint16_t svuzp2q(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_bf16))) +svbfloat16_t svuzp2q(svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s8))) +svint8_t svuzp2q(svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f64))) +svfloat64_t svuzp2q(svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f32))) +svfloat32_t svuzp2q(svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f16))) +svfloat16_t svuzp2q(svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s32))) +svint32_t svuzp2q(svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s64))) +svint64_t svuzp2q(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s16))) +svint16_t svuzp2q(svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u8))) +svuint8_t svzip1q(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u32))) +svuint32_t svzip1q(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u64))) +svuint64_t svzip1q(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u16))) +svuint16_t svzip1q(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_bf16))) +svbfloat16_t svzip1q(svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s8))) +svint8_t svzip1q(svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f64))) +svfloat64_t svzip1q(svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f32))) +svfloat32_t svzip1q(svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f16))) +svfloat16_t svzip1q(svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s32))) +svint32_t svzip1q(svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s64))) +svint64_t svzip1q(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s16))) +svint16_t svzip1q(svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u8))) +svuint8_t svzip2q(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u32))) +svuint32_t svzip2q(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u64))) +svuint64_t svzip2q(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u16))) +svuint16_t svzip2q(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_bf16))) +svbfloat16_t svzip2q(svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s8))) +svint8_t svzip2q(svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f64))) +svfloat64_t svzip2q(svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f32))) +svfloat32_t svzip2q(svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f16))) +svfloat16_t svzip2q(svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s32))) +svint32_t svzip2q(svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s64))) +svint64_t svzip2q(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s16))) +svint16_t svzip2q(svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_s32))) +svint32_t svmmla_s32(svint32_t, svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_u32))) +svuint32_t svmmla_u32(svuint32_t, svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusmmla_s32))) +svint32_t svusmmla_s32(svint32_t, svuint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_s32))) +svint32_t svmmla(svint32_t, svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_u32))) +svuint32_t svmmla(svuint32_t, svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusmmla_s32))) +svint32_t svusmmla(svint32_t, svuint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_n_s32))) +svint32_t svsudot_n_s32(svint32_t, svint8_t, uint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_s32))) +svint32_t svsudot_s32(svint32_t, svint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_lane_s32))) +svint32_t svsudot_lane_s32(svint32_t, svint8_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_n_s32))) +svint32_t svusdot_n_s32(svint32_t, svuint8_t, int8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_s32))) +svint32_t svusdot_s32(svint32_t, svuint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_lane_s32))) +svint32_t svusdot_lane_s32(svint32_t, svuint8_t, svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_n_s32))) +svint32_t svsudot(svint32_t, svint8_t, uint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_s32))) +svint32_t svsudot(svint32_t, svint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_lane_s32))) +svint32_t svsudot_lane(svint32_t, svint8_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_n_s32))) +svint32_t svusdot(svint32_t, svuint8_t, int8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_s32))) +svint32_t svusdot(svint32_t, svuint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_lane_s32))) +svint32_t svusdot_lane(svint32_t, svuint8_t, svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_lane_u8_x2))) +svuint8x2_t svaesd_lane_u8_x2(svuint8x2_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_lane_u8_x4))) +svuint8x4_t svaesd_lane_u8_x4(svuint8x4_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesdimc_lane_u8_x2))) +svuint8x2_t svaesdimc_lane_u8_x2(svuint8x2_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesdimc_lane_u8_x4))) +svuint8x4_t svaesdimc_lane_u8_x4(svuint8x4_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_lane_u8_x2))) +svuint8x2_t svaese_lane_u8_x2(svuint8x2_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_lane_u8_x4))) +svuint8x4_t svaese_lane_u8_x4(svuint8x4_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesemc_lane_u8_x2))) +svuint8x2_t svaesemc_lane_u8_x2(svuint8x2_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesemc_lane_u8_x4))) +svuint8x4_t svaesemc_lane_u8_x4(svuint8x4_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmlal_pair_n_u64_x2))) +svuint64x2_t svpmlal_pair_n_u64_x2(svuint64x2_t, svuint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmlal_pair_u64_x2))) +svuint64x2_t svpmlal_pair_u64_x2(svuint64x2_t, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmull_pair_n_u64_x2))) +svuint64x2_t svpmull_pair_n_u64_x2(svuint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmull_pair_u64_x2))) +svuint64x2_t svpmull_pair_u64_x2(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_lane_u8_x2))) +svuint8x2_t svaesd_lane(svuint8x2_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_lane_u8_x4))) +svuint8x4_t svaesd_lane(svuint8x4_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesdimc_lane_u8_x2))) +svuint8x2_t svaesdimc_lane(svuint8x2_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesdimc_lane_u8_x4))) +svuint8x4_t svaesdimc_lane(svuint8x4_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_lane_u8_x2))) +svuint8x2_t svaese_lane(svuint8x2_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_lane_u8_x4))) +svuint8x4_t svaese_lane(svuint8x4_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesemc_lane_u8_x2))) +svuint8x2_t svaesemc_lane(svuint8x2_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesemc_lane_u8_x4))) +svuint8x4_t svaesemc_lane(svuint8x4_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmlal_pair_n_u64_x2))) +svuint64x2_t svpmlal_pair(svuint64x2_t, svuint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmlal_pair_u64_x2))) +svuint64x2_t svpmlal_pair(svuint64x2_t, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmull_pair_n_u64_x2))) +svuint64x2_t svpmull_pair(svuint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmull_pair_u64_x2))) +svuint64x2_t svpmull_pair(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_u8))) +svuint8_t svaesd_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_u8))) +svuint8_t svaese_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesimc_u8))) +svuint8_t svaesimc_u8(svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesmc_u8))) +svuint8_t svaesmc_u8(svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u64))) +svuint64_t svpmullb_pair_n_u64(svuint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u64))) +svuint64_t svpmullb_pair_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u64))) +svuint64_t svpmullt_pair_n_u64(svuint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u64))) +svuint64_t svpmullt_pair_u64(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_u8))) +svuint8_t svaesd(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_u8))) +svuint8_t svaese(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesimc_u8))) +svuint8_t svaesimc(svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesmc_u8))) +svuint8_t svaesmc(svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u64))) +svuint64_t svpmullb_pair(svuint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u64))) +svuint64_t svpmullb_pair(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u64))) +svuint64_t svpmullt_pair(svuint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u64))) +svuint64_t svpmullt_pair(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_m))) +svbfloat16_t svadd_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_x))) +svbfloat16_t svadd_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_z))) +svbfloat16_t svadd_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_m))) +svbfloat16_t svadd_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_x))) +svbfloat16_t svadd_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_z))) +svbfloat16_t svadd_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_bf16))) +svbfloat16_t svclamp_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_m))) +svbfloat16_t svmax_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_x))) +svbfloat16_t svmax_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_z))) +svbfloat16_t svmax_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_m))) +svbfloat16_t svmax_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_x))) +svbfloat16_t svmax_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_z))) +svbfloat16_t svmax_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_m))) +svbfloat16_t svmaxnm_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_x))) +svbfloat16_t svmaxnm_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_z))) +svbfloat16_t svmaxnm_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_m))) +svbfloat16_t svmaxnm_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_x))) +svbfloat16_t svmaxnm_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_z))) +svbfloat16_t svmaxnm_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_m))) +svbfloat16_t svmin_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_x))) +svbfloat16_t svmin_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_z))) +svbfloat16_t svmin_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_m))) +svbfloat16_t svmin_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_x))) +svbfloat16_t svmin_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_z))) +svbfloat16_t svmin_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_m))) +svbfloat16_t svminnm_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_x))) +svbfloat16_t svminnm_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_z))) +svbfloat16_t svminnm_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_m))) +svbfloat16_t svminnm_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x))) +svbfloat16_t svminnm_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_z))) +svbfloat16_t svminnm_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_m))) +svbfloat16_t svmla_n_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_x))) +svbfloat16_t svmla_n_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_z))) +svbfloat16_t svmla_n_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_m))) +svbfloat16_t svmla_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_x))) +svbfloat16_t svmla_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_z))) +svbfloat16_t svmla_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_bf16))) +svbfloat16_t svmla_lane_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_m))) +svbfloat16_t svmls_n_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_x))) +svbfloat16_t svmls_n_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_z))) +svbfloat16_t svmls_n_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_m))) +svbfloat16_t svmls_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_x))) +svbfloat16_t svmls_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_z))) +svbfloat16_t svmls_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_bf16))) +svbfloat16_t svmls_lane_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_m))) +svbfloat16_t svmul_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_x))) +svbfloat16_t svmul_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_z))) +svbfloat16_t svmul_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_m))) +svbfloat16_t svmul_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x))) +svbfloat16_t svmul_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_z))) +svbfloat16_t svmul_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_bf16))) +svbfloat16_t svmul_lane_bf16(svbfloat16_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_m))) +svbfloat16_t svsub_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_x))) +svbfloat16_t svsub_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_z))) +svbfloat16_t svsub_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_m))) +svbfloat16_t svsub_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_x))) +svbfloat16_t svsub_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_z))) +svbfloat16_t svsub_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_m))) +svbfloat16_t svadd_m(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_x))) +svbfloat16_t svadd_x(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_z))) +svbfloat16_t svadd_z(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_m))) +svbfloat16_t svadd_m(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_x))) +svbfloat16_t svadd_x(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_z))) +svbfloat16_t svadd_z(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_bf16))) +svbfloat16_t svclamp(svbfloat16_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_m))) +svbfloat16_t svmax_m(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_x))) +svbfloat16_t svmax_x(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_z))) +svbfloat16_t svmax_z(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_m))) +svbfloat16_t svmax_m(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_x))) +svbfloat16_t svmax_x(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_z))) +svbfloat16_t svmax_z(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_m))) +svbfloat16_t svmaxnm_m(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_x))) +svbfloat16_t svmaxnm_x(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_z))) +svbfloat16_t svmaxnm_z(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_m))) +svbfloat16_t svmaxnm_m(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_x))) +svbfloat16_t svmaxnm_x(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_z))) +svbfloat16_t svmaxnm_z(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_m))) +svbfloat16_t svmin_m(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_x))) +svbfloat16_t svmin_x(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_z))) +svbfloat16_t svmin_z(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_m))) +svbfloat16_t svmin_m(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_x))) +svbfloat16_t svmin_x(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_z))) +svbfloat16_t svmin_z(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_m))) +svbfloat16_t svminnm_m(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_x))) +svbfloat16_t svminnm_x(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_z))) +svbfloat16_t svminnm_z(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_m))) +svbfloat16_t svminnm_m(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x))) +svbfloat16_t svminnm_x(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_z))) +svbfloat16_t svminnm_z(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_m))) +svbfloat16_t svmla_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_x))) +svbfloat16_t svmla_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_z))) +svbfloat16_t svmla_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_m))) +svbfloat16_t svmla_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_x))) +svbfloat16_t svmla_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_z))) +svbfloat16_t svmla_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_bf16))) +svbfloat16_t svmla_lane(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_m))) +svbfloat16_t svmls_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_x))) +svbfloat16_t svmls_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_z))) +svbfloat16_t svmls_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_m))) +svbfloat16_t svmls_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_x))) +svbfloat16_t svmls_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_z))) +svbfloat16_t svmls_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_bf16))) +svbfloat16_t svmls_lane(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_m))) +svbfloat16_t svmul_m(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_x))) +svbfloat16_t svmul_x(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_z))) +svbfloat16_t svmul_z(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_m))) +svbfloat16_t svmul_m(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x))) +svbfloat16_t svmul_x(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_z))) +svbfloat16_t svmul_z(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_bf16))) +svbfloat16_t svmul_lane(svbfloat16_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_m))) +svbfloat16_t svsub_m(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_x))) +svbfloat16_t svsub_x(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_z))) +svbfloat16_t svsub_z(svbool_t, svbfloat16_t, bfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_m))) +svbfloat16_t svsub_m(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_x))) +svbfloat16_t svsub_x(svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_z))) +svbfloat16_t svsub_z(svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_bf16_m))) +svbfloat16_t svscale_n_bf16_m(svbool_t, svbfloat16_t, int16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_bf16_x))) +svbfloat16_t svscale_n_bf16_x(svbool_t, svbfloat16_t, int16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_bf16_z))) +svbfloat16_t svscale_n_bf16_z(svbool_t, svbfloat16_t, int16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_m))) +svbfloat16_t svscale_bf16_m(svbool_t, svbfloat16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_x))) +svbfloat16_t svscale_bf16_x(svbool_t, svbfloat16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_z))) +svbfloat16_t svscale_bf16_z(svbool_t, svbfloat16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_bf16_m))) +svbfloat16_t svscale_m(svbool_t, svbfloat16_t, int16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_bf16_x))) +svbfloat16_t svscale_x(svbool_t, svbfloat16_t, int16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_bf16_z))) +svbfloat16_t svscale_z(svbool_t, svbfloat16_t, int16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_m))) +svbfloat16_t svscale_m(svbool_t, svbfloat16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_x))) +svbfloat16_t svscale_x(svbool_t, svbfloat16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_z))) +svbfloat16_t svscale_z(svbool_t, svbfloat16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u8))) +svuint8_t svbdep_n_u8(svuint8_t, uint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u32))) +svuint32_t svbdep_n_u32(svuint32_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u64))) +svuint64_t svbdep_n_u64(svuint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u16))) +svuint16_t svbdep_n_u16(svuint16_t, uint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u8))) +svuint8_t svbdep_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u32))) +svuint32_t svbdep_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u64))) +svuint64_t svbdep_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u16))) +svuint16_t svbdep_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u8))) +svuint8_t svbext_n_u8(svuint8_t, uint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u32))) +svuint32_t svbext_n_u32(svuint32_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u64))) +svuint64_t svbext_n_u64(svuint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u16))) +svuint16_t svbext_n_u16(svuint16_t, uint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u8))) +svuint8_t svbext_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u32))) +svuint32_t svbext_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u64))) +svuint64_t svbext_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u16))) +svuint16_t svbext_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u8))) +svuint8_t svbgrp_n_u8(svuint8_t, uint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u32))) +svuint32_t svbgrp_n_u32(svuint32_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u64))) +svuint64_t svbgrp_n_u64(svuint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u16))) +svuint16_t svbgrp_n_u16(svuint16_t, uint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u8))) +svuint8_t svbgrp_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u32))) +svuint32_t svbgrp_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u64))) +svuint64_t svbgrp_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u16))) +svuint16_t svbgrp_u16(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u8))) +svuint8_t svbdep(svuint8_t, uint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u32))) +svuint32_t svbdep(svuint32_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u64))) +svuint64_t svbdep(svuint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u16))) +svuint16_t svbdep(svuint16_t, uint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u8))) +svuint8_t svbdep(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u32))) +svuint32_t svbdep(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u64))) +svuint64_t svbdep(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u16))) +svuint16_t svbdep(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u8))) +svuint8_t svbext(svuint8_t, uint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u32))) +svuint32_t svbext(svuint32_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u64))) +svuint64_t svbext(svuint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u16))) +svuint16_t svbext(svuint16_t, uint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u8))) +svuint8_t svbext(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u32))) +svuint32_t svbext(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u64))) +svuint64_t svbext(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u16))) +svuint16_t svbext(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u8))) +svuint8_t svbgrp(svuint8_t, uint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u32))) +svuint32_t svbgrp(svuint32_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u64))) +svuint64_t svbgrp(svuint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u16))) +svuint16_t svbgrp(svuint16_t, uint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u8))) +svuint8_t svbgrp(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u32))) +svuint32_t svbgrp(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u64))) +svuint64_t svbgrp(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u16))) +svuint16_t svbgrp(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32_f16))) +svfloat32_t svmmla_f32_f16(svfloat32_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32_f16))) +svfloat32_t svmmla(svfloat32_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_u64))) +svuint64_t svrax1_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_s64))) +svint64_t svrax1_s64(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_u64))) +svuint64_t svrax1(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_s64))) +svint64_t svrax1(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4e_u32))) +svuint32_t svsm4e_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4ekey_u32))) +svuint32_t svsm4ekey_u32(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4e_u32))) +svuint32_t svsm4e(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4ekey_u32))) +svuint32_t svsm4ekey(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u32_z))) +svuint32_t svhistcnt_u32_z(svbool_t, svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u64_z))) +svuint64_t svhistcnt_u64_z(svbool_t, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s32_z))) +svuint32_t svhistcnt_s32_z(svbool_t, svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s64_z))) +svuint64_t svhistcnt_s64_z(svbool_t, svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_u8))) +svuint8_t svhistseg_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_s8))) +svuint8_t svhistseg_s8(svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_u32))) +svuint32_t svldnt1_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_u64))) +svuint64_t svldnt1_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_f64))) +svfloat64_t svldnt1_gather_u64base_index_f64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_f32))) +svfloat32_t svldnt1_gather_u32base_index_f32(svbool_t, svuint32_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_s32))) +svint32_t svldnt1_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_s64))) +svint64_t svldnt1_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_u32))) +svuint32_t svldnt1_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_u64))) +svuint64_t svldnt1_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_f64))) +svfloat64_t svldnt1_gather_u64base_offset_f64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_f32))) +svfloat32_t svldnt1_gather_u32base_offset_f32(svbool_t, svuint32_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_s32))) +svint32_t svldnt1_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_s64))) +svint64_t svldnt1_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_u32))) +svuint32_t svldnt1_gather_u32base_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_u64))) +svuint64_t svldnt1_gather_u64base_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_f64))) +svfloat64_t svldnt1_gather_u64base_f64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_f32))) +svfloat32_t svldnt1_gather_u32base_f32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_s32))) +svint32_t svldnt1_gather_u32base_s32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_s64))) +svint64_t svldnt1_gather_u64base_s64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_u64))) +svuint64_t svldnt1_gather_s64index_u64(svbool_t, uint64_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_f64))) +svfloat64_t svldnt1_gather_s64index_f64(svbool_t, float64_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_s64))) +svint64_t svldnt1_gather_s64index_s64(svbool_t, int64_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_u64))) +svuint64_t svldnt1_gather_u64index_u64(svbool_t, uint64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_f64))) +svfloat64_t svldnt1_gather_u64index_f64(svbool_t, float64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_s64))) +svint64_t svldnt1_gather_u64index_s64(svbool_t, int64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_u32))) +svuint32_t svldnt1_gather_u32offset_u32(svbool_t, uint32_t const *, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_f32))) +svfloat32_t svldnt1_gather_u32offset_f32(svbool_t, float32_t const *, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_s32))) +svint32_t svldnt1_gather_u32offset_s32(svbool_t, int32_t const *, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_u64))) +svuint64_t svldnt1_gather_s64offset_u64(svbool_t, uint64_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_f64))) +svfloat64_t svldnt1_gather_s64offset_f64(svbool_t, float64_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_s64))) +svint64_t svldnt1_gather_s64offset_s64(svbool_t, int64_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_u64))) +svuint64_t svldnt1_gather_u64offset_u64(svbool_t, uint64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_f64))) +svfloat64_t svldnt1_gather_u64offset_f64(svbool_t, float64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_s64))) +svint64_t svldnt1_gather_u64offset_s64(svbool_t, int64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_u32))) +svuint32_t svldnt1sb_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_u64))) +svuint64_t svldnt1sb_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_s32))) +svint32_t svldnt1sb_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_s64))) +svint64_t svldnt1sb_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_u32))) +svuint32_t svldnt1sb_gather_u32base_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_u64))) +svuint64_t svldnt1sb_gather_u64base_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_s32))) +svint32_t svldnt1sb_gather_u32base_s32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_s64))) +svint64_t svldnt1sb_gather_u64base_s64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_u32))) +svuint32_t svldnt1sb_gather_u32offset_u32(svbool_t, int8_t const *, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_s32))) +svint32_t svldnt1sb_gather_u32offset_s32(svbool_t, int8_t const *, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_u64))) +svuint64_t svldnt1sb_gather_s64offset_u64(svbool_t, int8_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_s64))) +svint64_t svldnt1sb_gather_s64offset_s64(svbool_t, int8_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_u64))) +svuint64_t svldnt1sb_gather_u64offset_u64(svbool_t, int8_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_s64))) +svint64_t svldnt1sb_gather_u64offset_s64(svbool_t, int8_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_u32))) +svuint32_t svldnt1sh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_u64))) +svuint64_t svldnt1sh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_s32))) +svint32_t svldnt1sh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_s64))) +svint64_t svldnt1sh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_u32))) +svuint32_t svldnt1sh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_u64))) +svuint64_t svldnt1sh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_s32))) +svint32_t svldnt1sh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_s64))) +svint64_t svldnt1sh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_u32))) +svuint32_t svldnt1sh_gather_u32base_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_u64))) +svuint64_t svldnt1sh_gather_u64base_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_s32))) +svint32_t svldnt1sh_gather_u32base_s32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_s64))) +svint64_t svldnt1sh_gather_u64base_s64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_u64))) +svuint64_t svldnt1sh_gather_s64index_u64(svbool_t, int16_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_s64))) +svint64_t svldnt1sh_gather_s64index_s64(svbool_t, int16_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_u64))) +svuint64_t svldnt1sh_gather_u64index_u64(svbool_t, int16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_s64))) +svint64_t svldnt1sh_gather_u64index_s64(svbool_t, int16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_u32))) +svuint32_t svldnt1sh_gather_u32offset_u32(svbool_t, int16_t const *, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_s32))) +svint32_t svldnt1sh_gather_u32offset_s32(svbool_t, int16_t const *, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_u64))) +svuint64_t svldnt1sh_gather_s64offset_u64(svbool_t, int16_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_s64))) +svint64_t svldnt1sh_gather_s64offset_s64(svbool_t, int16_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_u64))) +svuint64_t svldnt1sh_gather_u64offset_u64(svbool_t, int16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_s64))) +svint64_t svldnt1sh_gather_u64offset_s64(svbool_t, int16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_u64))) +svuint64_t svldnt1sw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_s64))) +svint64_t svldnt1sw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_u64))) +svuint64_t svldnt1sw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_s64))) +svint64_t svldnt1sw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_u64))) +svuint64_t svldnt1sw_gather_u64base_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_s64))) +svint64_t svldnt1sw_gather_u64base_s64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_u64))) +svuint64_t svldnt1sw_gather_s64index_u64(svbool_t, int32_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_s64))) +svint64_t svldnt1sw_gather_s64index_s64(svbool_t, int32_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_u64))) +svuint64_t svldnt1sw_gather_u64index_u64(svbool_t, int32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_s64))) +svint64_t svldnt1sw_gather_u64index_s64(svbool_t, int32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_u64))) +svuint64_t svldnt1sw_gather_s64offset_u64(svbool_t, int32_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_s64))) +svint64_t svldnt1sw_gather_s64offset_s64(svbool_t, int32_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_u64))) +svuint64_t svldnt1sw_gather_u64offset_u64(svbool_t, int32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_s64))) +svint64_t svldnt1sw_gather_u64offset_s64(svbool_t, int32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_u32))) +svuint32_t svldnt1ub_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_u64))) +svuint64_t svldnt1ub_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_s32))) +svint32_t svldnt1ub_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_s64))) +svint64_t svldnt1ub_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_u32))) +svuint32_t svldnt1ub_gather_u32base_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_u64))) +svuint64_t svldnt1ub_gather_u64base_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_s32))) +svint32_t svldnt1ub_gather_u32base_s32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_s64))) +svint64_t svldnt1ub_gather_u64base_s64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_u32))) +svuint32_t svldnt1ub_gather_u32offset_u32(svbool_t, uint8_t const *, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_s32))) +svint32_t svldnt1ub_gather_u32offset_s32(svbool_t, uint8_t const *, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_u64))) +svuint64_t svldnt1ub_gather_s64offset_u64(svbool_t, uint8_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_s64))) +svint64_t svldnt1ub_gather_s64offset_s64(svbool_t, uint8_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_u64))) +svuint64_t svldnt1ub_gather_u64offset_u64(svbool_t, uint8_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_s64))) +svint64_t svldnt1ub_gather_u64offset_s64(svbool_t, uint8_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_u32))) +svuint32_t svldnt1uh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_u64))) +svuint64_t svldnt1uh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_s32))) +svint32_t svldnt1uh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_s64))) +svint64_t svldnt1uh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_u32))) +svuint32_t svldnt1uh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_u64))) +svuint64_t svldnt1uh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_s32))) +svint32_t svldnt1uh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_s64))) +svint64_t svldnt1uh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_u32))) +svuint32_t svldnt1uh_gather_u32base_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_u64))) +svuint64_t svldnt1uh_gather_u64base_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_s32))) +svint32_t svldnt1uh_gather_u32base_s32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_s64))) +svint64_t svldnt1uh_gather_u64base_s64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_u64))) +svuint64_t svldnt1uh_gather_s64index_u64(svbool_t, uint16_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_s64))) +svint64_t svldnt1uh_gather_s64index_s64(svbool_t, uint16_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_u64))) +svuint64_t svldnt1uh_gather_u64index_u64(svbool_t, uint16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_s64))) +svint64_t svldnt1uh_gather_u64index_s64(svbool_t, uint16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_u32))) +svuint32_t svldnt1uh_gather_u32offset_u32(svbool_t, uint16_t const *, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_s32))) +svint32_t svldnt1uh_gather_u32offset_s32(svbool_t, uint16_t const *, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_u64))) +svuint64_t svldnt1uh_gather_s64offset_u64(svbool_t, uint16_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_s64))) +svint64_t svldnt1uh_gather_s64offset_s64(svbool_t, uint16_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_u64))) +svuint64_t svldnt1uh_gather_u64offset_u64(svbool_t, uint16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_s64))) +svint64_t svldnt1uh_gather_u64offset_s64(svbool_t, uint16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_u64))) +svuint64_t svldnt1uw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_s64))) +svint64_t svldnt1uw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_u64))) +svuint64_t svldnt1uw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_s64))) +svint64_t svldnt1uw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_u64))) +svuint64_t svldnt1uw_gather_u64base_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_s64))) +svint64_t svldnt1uw_gather_u64base_s64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_u64))) +svuint64_t svldnt1uw_gather_s64index_u64(svbool_t, uint32_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_s64))) +svint64_t svldnt1uw_gather_s64index_s64(svbool_t, uint32_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_u64))) +svuint64_t svldnt1uw_gather_u64index_u64(svbool_t, uint32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_s64))) +svint64_t svldnt1uw_gather_u64index_s64(svbool_t, uint32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_u64))) +svuint64_t svldnt1uw_gather_s64offset_u64(svbool_t, uint32_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_s64))) +svint64_t svldnt1uw_gather_s64offset_s64(svbool_t, uint32_t const *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_u64))) +svuint64_t svldnt1uw_gather_u64offset_u64(svbool_t, uint32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_s64))) +svint64_t svldnt1uw_gather_u64offset_s64(svbool_t, uint32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u8))) +svbool_t svmatch_u8(svbool_t, svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u16))) +svbool_t svmatch_u16(svbool_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s8))) +svbool_t svmatch_s8(svbool_t, svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s16))) +svbool_t svmatch_s16(svbool_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u8))) +svbool_t svnmatch_u8(svbool_t, svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u16))) +svbool_t svnmatch_u16(svbool_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s8))) +svbool_t svnmatch_s8(svbool_t, svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s16))) +svbool_t svnmatch_s16(svbool_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_u32))) +void svstnt1_scatter_u32base_index_u32(svbool_t, svuint32_t, int64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_u64))) +void svstnt1_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_f64))) +void svstnt1_scatter_u64base_index_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_f32))) +void svstnt1_scatter_u32base_index_f32(svbool_t, svuint32_t, int64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_s32))) +void svstnt1_scatter_u32base_index_s32(svbool_t, svuint32_t, int64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_s64))) +void svstnt1_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_u32))) +void svstnt1_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_u64))) +void svstnt1_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_f64))) +void svstnt1_scatter_u64base_offset_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_f32))) +void svstnt1_scatter_u32base_offset_f32(svbool_t, svuint32_t, int64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_s32))) +void svstnt1_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_s64))) +void svstnt1_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_u32))) +void svstnt1_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_u64))) +void svstnt1_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_f64))) +void svstnt1_scatter_u64base_f64(svbool_t, svuint64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_f32))) +void svstnt1_scatter_u32base_f32(svbool_t, svuint32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_s32))) +void svstnt1_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_s64))) +void svstnt1_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_u64))) +void svstnt1_scatter_s64index_u64(svbool_t, uint64_t *, svint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_f64))) +void svstnt1_scatter_s64index_f64(svbool_t, float64_t *, svint64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_s64))) +void svstnt1_scatter_s64index_s64(svbool_t, int64_t *, svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_u64))) +void svstnt1_scatter_u64index_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_f64))) +void svstnt1_scatter_u64index_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_s64))) +void svstnt1_scatter_u64index_s64(svbool_t, int64_t *, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_u32))) +void svstnt1_scatter_u32offset_u32(svbool_t, uint32_t *, svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_f32))) +void svstnt1_scatter_u32offset_f32(svbool_t, float32_t *, svuint32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_s32))) +void svstnt1_scatter_u32offset_s32(svbool_t, int32_t *, svuint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_u64))) +void svstnt1_scatter_s64offset_u64(svbool_t, uint64_t *, svint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_f64))) +void svstnt1_scatter_s64offset_f64(svbool_t, float64_t *, svint64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_s64))) +void svstnt1_scatter_s64offset_s64(svbool_t, int64_t *, svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_u64))) +void svstnt1_scatter_u64offset_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_f64))) +void svstnt1_scatter_u64offset_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_s64))) +void svstnt1_scatter_u64offset_s64(svbool_t, int64_t *, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_u32))) +void svstnt1b_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_u64))) +void svstnt1b_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_s32))) +void svstnt1b_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_s64))) +void svstnt1b_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_u32))) +void svstnt1b_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_u64))) +void svstnt1b_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_s32))) +void svstnt1b_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_s64))) +void svstnt1b_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_s32))) +void svstnt1b_scatter_u32offset_s32(svbool_t, int8_t *, svuint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_u32))) +void svstnt1b_scatter_u32offset_u32(svbool_t, uint8_t *, svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_s64))) +void svstnt1b_scatter_s64offset_s64(svbool_t, int8_t *, svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_u64))) +void svstnt1b_scatter_s64offset_u64(svbool_t, uint8_t *, svint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_s64))) +void svstnt1b_scatter_u64offset_s64(svbool_t, int8_t *, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_u64))) +void svstnt1b_scatter_u64offset_u64(svbool_t, uint8_t *, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_u32))) +void svstnt1h_scatter_u32base_index_u32(svbool_t, svuint32_t, int64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_u64))) +void svstnt1h_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_s32))) +void svstnt1h_scatter_u32base_index_s32(svbool_t, svuint32_t, int64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_s64))) +void svstnt1h_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_u32))) +void svstnt1h_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_u64))) +void svstnt1h_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_s32))) +void svstnt1h_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_s64))) +void svstnt1h_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_u32))) +void svstnt1h_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_u64))) +void svstnt1h_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_s32))) +void svstnt1h_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_s64))) +void svstnt1h_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_s64))) +void svstnt1h_scatter_s64index_s64(svbool_t, int16_t *, svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_u64))) +void svstnt1h_scatter_s64index_u64(svbool_t, uint16_t *, svint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_s64))) +void svstnt1h_scatter_u64index_s64(svbool_t, int16_t *, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_u64))) +void svstnt1h_scatter_u64index_u64(svbool_t, uint16_t *, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_s32))) +void svstnt1h_scatter_u32offset_s32(svbool_t, int16_t *, svuint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_u32))) +void svstnt1h_scatter_u32offset_u32(svbool_t, uint16_t *, svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_s64))) +void svstnt1h_scatter_s64offset_s64(svbool_t, int16_t *, svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_u64))) +void svstnt1h_scatter_s64offset_u64(svbool_t, uint16_t *, svint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_s64))) +void svstnt1h_scatter_u64offset_s64(svbool_t, int16_t *, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_u64))) +void svstnt1h_scatter_u64offset_u64(svbool_t, uint16_t *, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_u64))) +void svstnt1w_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_s64))) +void svstnt1w_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_u64))) +void svstnt1w_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_s64))) +void svstnt1w_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_u64))) +void svstnt1w_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_s64))) +void svstnt1w_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_s64))) +void svstnt1w_scatter_s64index_s64(svbool_t, int32_t *, svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_u64))) +void svstnt1w_scatter_s64index_u64(svbool_t, uint32_t *, svint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_s64))) +void svstnt1w_scatter_u64index_s64(svbool_t, int32_t *, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_u64))) +void svstnt1w_scatter_u64index_u64(svbool_t, uint32_t *, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_s64))) +void svstnt1w_scatter_s64offset_s64(svbool_t, int32_t *, svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_u64))) +void svstnt1w_scatter_s64offset_u64(svbool_t, uint32_t *, svint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_s64))) +void svstnt1w_scatter_u64offset_s64(svbool_t, int32_t *, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_u64))) +void svstnt1w_scatter_u64offset_u64(svbool_t, uint32_t *, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u32_z))) +svuint32_t svhistcnt_z(svbool_t, svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u64_z))) +svuint64_t svhistcnt_z(svbool_t, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s32_z))) +svuint32_t svhistcnt_z(svbool_t, svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s64_z))) +svuint64_t svhistcnt_z(svbool_t, svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_u8))) +svuint8_t svhistseg(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_s8))) +svuint8_t svhistseg(svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_u32))) +svuint32_t svldnt1_gather_index_u32(svbool_t, svuint32_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_u64))) +svuint64_t svldnt1_gather_index_u64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_f64))) +svfloat64_t svldnt1_gather_index_f64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_f32))) +svfloat32_t svldnt1_gather_index_f32(svbool_t, svuint32_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_s32))) +svint32_t svldnt1_gather_index_s32(svbool_t, svuint32_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_s64))) +svint64_t svldnt1_gather_index_s64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_u32))) +svuint32_t svldnt1_gather_offset_u32(svbool_t, svuint32_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_u64))) +svuint64_t svldnt1_gather_offset_u64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_f64))) +svfloat64_t svldnt1_gather_offset_f64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_f32))) +svfloat32_t svldnt1_gather_offset_f32(svbool_t, svuint32_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_s32))) +svint32_t svldnt1_gather_offset_s32(svbool_t, svuint32_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_s64))) +svint64_t svldnt1_gather_offset_s64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_u32))) +svuint32_t svldnt1_gather_u32(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_u64))) +svuint64_t svldnt1_gather_u64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_f64))) +svfloat64_t svldnt1_gather_f64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_f32))) +svfloat32_t svldnt1_gather_f32(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_s32))) +svint32_t svldnt1_gather_s32(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_s64))) +svint64_t svldnt1_gather_s64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_u64))) +svuint64_t svldnt1_gather_index(svbool_t, uint64_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_f64))) +svfloat64_t svldnt1_gather_index(svbool_t, float64_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_s64))) +svint64_t svldnt1_gather_index(svbool_t, int64_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_u64))) +svuint64_t svldnt1_gather_index(svbool_t, uint64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_f64))) +svfloat64_t svldnt1_gather_index(svbool_t, float64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_s64))) +svint64_t svldnt1_gather_index(svbool_t, int64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_u32))) +svuint32_t svldnt1_gather_offset(svbool_t, uint32_t const *, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_f32))) +svfloat32_t svldnt1_gather_offset(svbool_t, float32_t const *, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_s32))) +svint32_t svldnt1_gather_offset(svbool_t, int32_t const *, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_u64))) +svuint64_t svldnt1_gather_offset(svbool_t, uint64_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_f64))) +svfloat64_t svldnt1_gather_offset(svbool_t, float64_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_s64))) +svint64_t svldnt1_gather_offset(svbool_t, int64_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_u64))) +svuint64_t svldnt1_gather_offset(svbool_t, uint64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_f64))) +svfloat64_t svldnt1_gather_offset(svbool_t, float64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_s64))) +svint64_t svldnt1_gather_offset(svbool_t, int64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_u32))) +svuint32_t svldnt1sb_gather_offset_u32(svbool_t, svuint32_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_u64))) +svuint64_t svldnt1sb_gather_offset_u64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_s32))) +svint32_t svldnt1sb_gather_offset_s32(svbool_t, svuint32_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_s64))) +svint64_t svldnt1sb_gather_offset_s64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_u32))) +svuint32_t svldnt1sb_gather_u32(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_u64))) +svuint64_t svldnt1sb_gather_u64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_s32))) +svint32_t svldnt1sb_gather_s32(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_s64))) +svint64_t svldnt1sb_gather_s64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_u32))) +svuint32_t svldnt1sb_gather_offset_u32(svbool_t, int8_t const *, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_s32))) +svint32_t svldnt1sb_gather_offset_s32(svbool_t, int8_t const *, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_u64))) +svuint64_t svldnt1sb_gather_offset_u64(svbool_t, int8_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_s64))) +svint64_t svldnt1sb_gather_offset_s64(svbool_t, int8_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_u64))) +svuint64_t svldnt1sb_gather_offset_u64(svbool_t, int8_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_s64))) +svint64_t svldnt1sb_gather_offset_s64(svbool_t, int8_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_u32))) +svuint32_t svldnt1sh_gather_index_u32(svbool_t, svuint32_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_u64))) +svuint64_t svldnt1sh_gather_index_u64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_s32))) +svint32_t svldnt1sh_gather_index_s32(svbool_t, svuint32_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_s64))) +svint64_t svldnt1sh_gather_index_s64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_u32))) +svuint32_t svldnt1sh_gather_offset_u32(svbool_t, svuint32_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_u64))) +svuint64_t svldnt1sh_gather_offset_u64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_s32))) +svint32_t svldnt1sh_gather_offset_s32(svbool_t, svuint32_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_s64))) +svint64_t svldnt1sh_gather_offset_s64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_u32))) +svuint32_t svldnt1sh_gather_u32(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_u64))) +svuint64_t svldnt1sh_gather_u64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_s32))) +svint32_t svldnt1sh_gather_s32(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_s64))) +svint64_t svldnt1sh_gather_s64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_u64))) +svuint64_t svldnt1sh_gather_index_u64(svbool_t, int16_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_s64))) +svint64_t svldnt1sh_gather_index_s64(svbool_t, int16_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_u64))) +svuint64_t svldnt1sh_gather_index_u64(svbool_t, int16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_s64))) +svint64_t svldnt1sh_gather_index_s64(svbool_t, int16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_u32))) +svuint32_t svldnt1sh_gather_offset_u32(svbool_t, int16_t const *, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_s32))) +svint32_t svldnt1sh_gather_offset_s32(svbool_t, int16_t const *, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_u64))) +svuint64_t svldnt1sh_gather_offset_u64(svbool_t, int16_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_s64))) +svint64_t svldnt1sh_gather_offset_s64(svbool_t, int16_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_u64))) +svuint64_t svldnt1sh_gather_offset_u64(svbool_t, int16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_s64))) +svint64_t svldnt1sh_gather_offset_s64(svbool_t, int16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_u64))) +svuint64_t svldnt1sw_gather_index_u64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_s64))) +svint64_t svldnt1sw_gather_index_s64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_u64))) +svuint64_t svldnt1sw_gather_offset_u64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_s64))) +svint64_t svldnt1sw_gather_offset_s64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_u64))) +svuint64_t svldnt1sw_gather_u64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_s64))) +svint64_t svldnt1sw_gather_s64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_u64))) +svuint64_t svldnt1sw_gather_index_u64(svbool_t, int32_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_s64))) +svint64_t svldnt1sw_gather_index_s64(svbool_t, int32_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_u64))) +svuint64_t svldnt1sw_gather_index_u64(svbool_t, int32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_s64))) +svint64_t svldnt1sw_gather_index_s64(svbool_t, int32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_u64))) +svuint64_t svldnt1sw_gather_offset_u64(svbool_t, int32_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_s64))) +svint64_t svldnt1sw_gather_offset_s64(svbool_t, int32_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_u64))) +svuint64_t svldnt1sw_gather_offset_u64(svbool_t, int32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_s64))) +svint64_t svldnt1sw_gather_offset_s64(svbool_t, int32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_u32))) +svuint32_t svldnt1ub_gather_offset_u32(svbool_t, svuint32_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_u64))) +svuint64_t svldnt1ub_gather_offset_u64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_s32))) +svint32_t svldnt1ub_gather_offset_s32(svbool_t, svuint32_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_s64))) +svint64_t svldnt1ub_gather_offset_s64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_u32))) +svuint32_t svldnt1ub_gather_u32(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_u64))) +svuint64_t svldnt1ub_gather_u64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_s32))) +svint32_t svldnt1ub_gather_s32(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_s64))) +svint64_t svldnt1ub_gather_s64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_u32))) +svuint32_t svldnt1ub_gather_offset_u32(svbool_t, uint8_t const *, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_s32))) +svint32_t svldnt1ub_gather_offset_s32(svbool_t, uint8_t const *, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_u64))) +svuint64_t svldnt1ub_gather_offset_u64(svbool_t, uint8_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_s64))) +svint64_t svldnt1ub_gather_offset_s64(svbool_t, uint8_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_u64))) +svuint64_t svldnt1ub_gather_offset_u64(svbool_t, uint8_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_s64))) +svint64_t svldnt1ub_gather_offset_s64(svbool_t, uint8_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_u32))) +svuint32_t svldnt1uh_gather_index_u32(svbool_t, svuint32_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_u64))) +svuint64_t svldnt1uh_gather_index_u64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_s32))) +svint32_t svldnt1uh_gather_index_s32(svbool_t, svuint32_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_s64))) +svint64_t svldnt1uh_gather_index_s64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_u32))) +svuint32_t svldnt1uh_gather_offset_u32(svbool_t, svuint32_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_u64))) +svuint64_t svldnt1uh_gather_offset_u64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_s32))) +svint32_t svldnt1uh_gather_offset_s32(svbool_t, svuint32_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_s64))) +svint64_t svldnt1uh_gather_offset_s64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_u32))) +svuint32_t svldnt1uh_gather_u32(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_u64))) +svuint64_t svldnt1uh_gather_u64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_s32))) +svint32_t svldnt1uh_gather_s32(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_s64))) +svint64_t svldnt1uh_gather_s64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_u64))) +svuint64_t svldnt1uh_gather_index_u64(svbool_t, uint16_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_s64))) +svint64_t svldnt1uh_gather_index_s64(svbool_t, uint16_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_u64))) +svuint64_t svldnt1uh_gather_index_u64(svbool_t, uint16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_s64))) +svint64_t svldnt1uh_gather_index_s64(svbool_t, uint16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_u32))) +svuint32_t svldnt1uh_gather_offset_u32(svbool_t, uint16_t const *, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_s32))) +svint32_t svldnt1uh_gather_offset_s32(svbool_t, uint16_t const *, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_u64))) +svuint64_t svldnt1uh_gather_offset_u64(svbool_t, uint16_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_s64))) +svint64_t svldnt1uh_gather_offset_s64(svbool_t, uint16_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_u64))) +svuint64_t svldnt1uh_gather_offset_u64(svbool_t, uint16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_s64))) +svint64_t svldnt1uh_gather_offset_s64(svbool_t, uint16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_u64))) +svuint64_t svldnt1uw_gather_index_u64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_s64))) +svint64_t svldnt1uw_gather_index_s64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_u64))) +svuint64_t svldnt1uw_gather_offset_u64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_s64))) +svint64_t svldnt1uw_gather_offset_s64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_u64))) +svuint64_t svldnt1uw_gather_u64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_s64))) +svint64_t svldnt1uw_gather_s64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_u64))) +svuint64_t svldnt1uw_gather_index_u64(svbool_t, uint32_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_s64))) +svint64_t svldnt1uw_gather_index_s64(svbool_t, uint32_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_u64))) +svuint64_t svldnt1uw_gather_index_u64(svbool_t, uint32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_s64))) +svint64_t svldnt1uw_gather_index_s64(svbool_t, uint32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_u64))) +svuint64_t svldnt1uw_gather_offset_u64(svbool_t, uint32_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_s64))) +svint64_t svldnt1uw_gather_offset_s64(svbool_t, uint32_t const *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_u64))) +svuint64_t svldnt1uw_gather_offset_u64(svbool_t, uint32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_s64))) +svint64_t svldnt1uw_gather_offset_s64(svbool_t, uint32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u8))) +svbool_t svmatch(svbool_t, svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u16))) +svbool_t svmatch(svbool_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s8))) +svbool_t svmatch(svbool_t, svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s16))) +svbool_t svmatch(svbool_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u8))) +svbool_t svnmatch(svbool_t, svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u16))) +svbool_t svnmatch(svbool_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s8))) +svbool_t svnmatch(svbool_t, svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s16))) +svbool_t svnmatch(svbool_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_u32))) +void svstnt1_scatter_index(svbool_t, svuint32_t, int64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_u64))) +void svstnt1_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_f64))) +void svstnt1_scatter_index(svbool_t, svuint64_t, int64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_f32))) +void svstnt1_scatter_index(svbool_t, svuint32_t, int64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_s32))) +void svstnt1_scatter_index(svbool_t, svuint32_t, int64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_s64))) +void svstnt1_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_u32))) +void svstnt1_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_u64))) +void svstnt1_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_f64))) +void svstnt1_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_f32))) +void svstnt1_scatter_offset(svbool_t, svuint32_t, int64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_s32))) +void svstnt1_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_s64))) +void svstnt1_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_u32))) +void svstnt1_scatter(svbool_t, svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_u64))) +void svstnt1_scatter(svbool_t, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_f64))) +void svstnt1_scatter(svbool_t, svuint64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_f32))) +void svstnt1_scatter(svbool_t, svuint32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_s32))) +void svstnt1_scatter(svbool_t, svuint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_s64))) +void svstnt1_scatter(svbool_t, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_u64))) +void svstnt1_scatter_index(svbool_t, uint64_t *, svint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_f64))) +void svstnt1_scatter_index(svbool_t, float64_t *, svint64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_s64))) +void svstnt1_scatter_index(svbool_t, int64_t *, svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_u64))) +void svstnt1_scatter_index(svbool_t, uint64_t *, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_f64))) +void svstnt1_scatter_index(svbool_t, float64_t *, svuint64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_s64))) +void svstnt1_scatter_index(svbool_t, int64_t *, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_u32))) +void svstnt1_scatter_offset(svbool_t, uint32_t *, svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_f32))) +void svstnt1_scatter_offset(svbool_t, float32_t *, svuint32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_s32))) +void svstnt1_scatter_offset(svbool_t, int32_t *, svuint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_u64))) +void svstnt1_scatter_offset(svbool_t, uint64_t *, svint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_f64))) +void svstnt1_scatter_offset(svbool_t, float64_t *, svint64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_s64))) +void svstnt1_scatter_offset(svbool_t, int64_t *, svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_u64))) +void svstnt1_scatter_offset(svbool_t, uint64_t *, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_f64))) +void svstnt1_scatter_offset(svbool_t, float64_t *, svuint64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_s64))) +void svstnt1_scatter_offset(svbool_t, int64_t *, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_u32))) +void svstnt1b_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_u64))) +void svstnt1b_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_s32))) +void svstnt1b_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_s64))) +void svstnt1b_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_u32))) +void svstnt1b_scatter(svbool_t, svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_u64))) +void svstnt1b_scatter(svbool_t, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_s32))) +void svstnt1b_scatter(svbool_t, svuint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_s64))) +void svstnt1b_scatter(svbool_t, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_s32))) +void svstnt1b_scatter_offset(svbool_t, int8_t *, svuint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_u32))) +void svstnt1b_scatter_offset(svbool_t, uint8_t *, svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_s64))) +void svstnt1b_scatter_offset(svbool_t, int8_t *, svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_u64))) +void svstnt1b_scatter_offset(svbool_t, uint8_t *, svint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_s64))) +void svstnt1b_scatter_offset(svbool_t, int8_t *, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_u64))) +void svstnt1b_scatter_offset(svbool_t, uint8_t *, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_u32))) +void svstnt1h_scatter_index(svbool_t, svuint32_t, int64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_u64))) +void svstnt1h_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_s32))) +void svstnt1h_scatter_index(svbool_t, svuint32_t, int64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_s64))) +void svstnt1h_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_u32))) +void svstnt1h_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_u64))) +void svstnt1h_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_s32))) +void svstnt1h_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_s64))) +void svstnt1h_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_u32))) +void svstnt1h_scatter(svbool_t, svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_u64))) +void svstnt1h_scatter(svbool_t, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_s32))) +void svstnt1h_scatter(svbool_t, svuint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_s64))) +void svstnt1h_scatter(svbool_t, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_s64))) +void svstnt1h_scatter_index(svbool_t, int16_t *, svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_u64))) +void svstnt1h_scatter_index(svbool_t, uint16_t *, svint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_s64))) +void svstnt1h_scatter_index(svbool_t, int16_t *, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_u64))) +void svstnt1h_scatter_index(svbool_t, uint16_t *, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_s32))) +void svstnt1h_scatter_offset(svbool_t, int16_t *, svuint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_u32))) +void svstnt1h_scatter_offset(svbool_t, uint16_t *, svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_s64))) +void svstnt1h_scatter_offset(svbool_t, int16_t *, svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_u64))) +void svstnt1h_scatter_offset(svbool_t, uint16_t *, svint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_s64))) +void svstnt1h_scatter_offset(svbool_t, int16_t *, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_u64))) +void svstnt1h_scatter_offset(svbool_t, uint16_t *, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_u64))) +void svstnt1w_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_s64))) +void svstnt1w_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_u64))) +void svstnt1w_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_s64))) +void svstnt1w_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_u64))) +void svstnt1w_scatter(svbool_t, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_s64))) +void svstnt1w_scatter(svbool_t, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_s64))) +void svstnt1w_scatter_index(svbool_t, int32_t *, svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_u64))) +void svstnt1w_scatter_index(svbool_t, uint32_t *, svint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_s64))) +void svstnt1w_scatter_index(svbool_t, int32_t *, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_u64))) +void svstnt1w_scatter_index(svbool_t, uint32_t *, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_s64))) +void svstnt1w_scatter_offset(svbool_t, int32_t *, svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_u64))) +void svstnt1w_scatter_offset(svbool_t, uint32_t *, svint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_s64))) +void svstnt1w_scatter_offset(svbool_t, int32_t *, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_u64))) +void svstnt1w_scatter_offset(svbool_t, uint32_t *, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f16_mf8_fpm))) +svfloat16_t svmmla_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f16_mf8_fpm))) +svfloat16_t svmmla_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32_mf8_fpm))) +svfloat32_t svmmla_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32_mf8_fpm))) +svfloat32_t svmmla_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f16_mf8_fpm))) +svfloat16_t svdot_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f16_mf8_fpm))) +svfloat16_t svdot_n_f16_mf8_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f16_mf8_fpm))) +svfloat16_t svdot_lane_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f16_mf8_fpm))) +svfloat16_t svdot_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f16_mf8_fpm))) +svfloat16_t svdot_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f16_mf8_fpm))) +svfloat16_t svdot_lane_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_mf8_fpm))) +svfloat32_t svdot_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f32_mf8_fpm))) +svfloat32_t svdot_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_mf8_fpm))) +svfloat32_t svdot_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_mf8_fpm))) +svfloat32_t svdot_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f32_mf8_fpm))) +svfloat32_t svdot_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_mf8_fpm))) +svfloat32_t svdot_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_f16_mf8_fpm))) +svfloat16_t svmlalb_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_f16_mf8_fpm))) +svfloat16_t svmlalb_n_f16_mf8_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_f16_mf8_fpm))) +svfloat16_t svmlalb_lane_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_f32_mf8_fpm))) +svfloat32_t svmlallbb_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_n_f32_mf8_fpm))) +svfloat32_t svmlallbb_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_lane_f32_mf8_fpm))) +svfloat32_t svmlallbb_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_f32_mf8_fpm))) +svfloat32_t svmlallbt_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_n_f32_mf8_fpm))) +svfloat32_t svmlallbt_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_lane_f32_mf8_fpm))) +svfloat32_t svmlallbt_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_f32_mf8_fpm))) +svfloat32_t svmlalltb_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_n_f32_mf8_fpm))) +svfloat32_t svmlalltb_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_lane_f32_mf8_fpm))) +svfloat32_t svmlalltb_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_f32_mf8_fpm))) +svfloat32_t svmlalltt_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_n_f32_mf8_fpm))) +svfloat32_t svmlalltt_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_lane_f32_mf8_fpm))) +svfloat32_t svmlalltt_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_f16_mf8_fpm))) +svfloat16_t svmlalt_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_f16_mf8_fpm))) +svfloat16_t svmlalt_n_f16_mf8_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_f16_mf8_fpm))) +svfloat16_t svmlalt_lane_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_f16_mf8_fpm))) +svfloat16_t svmlalb_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_f16_mf8_fpm))) +svfloat16_t svmlalb_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_f16_mf8_fpm))) +svfloat16_t svmlalb_lane_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_f32_mf8_fpm))) +svfloat32_t svmlallbb_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_n_f32_mf8_fpm))) +svfloat32_t svmlallbb_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_lane_f32_mf8_fpm))) +svfloat32_t svmlallbb_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_f32_mf8_fpm))) +svfloat32_t svmlallbt_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_n_f32_mf8_fpm))) +svfloat32_t svmlallbt_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_lane_f32_mf8_fpm))) +svfloat32_t svmlallbt_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_f32_mf8_fpm))) +svfloat32_t svmlalltb_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_n_f32_mf8_fpm))) +svfloat32_t svmlalltb_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_lane_f32_mf8_fpm))) +svfloat32_t svmlalltb_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_f32_mf8_fpm))) +svfloat32_t svmlalltt_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_n_f32_mf8_fpm))) +svfloat32_t svmlalltt_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_lane_f32_mf8_fpm))) +svfloat32_t svmlalltt_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_f16_mf8_fpm))) +svfloat16_t svmlalt_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_f16_mf8_fpm))) +svfloat16_t svmlalt_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_f16_mf8_fpm))) +svfloat16_t svmlalt_lane_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u32))) +svuint32_t svld1q_gather_u64base_index_u32(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u64))) +svuint64_t svld1q_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u16))) +svuint16_t svld1q_gather_u64base_index_u16(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_bf16))) +svbfloat16_t svld1q_gather_u64base_index_bf16(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f64))) +svfloat64_t svld1q_gather_u64base_index_f64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f32))) +svfloat32_t svld1q_gather_u64base_index_f32(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f16))) +svfloat16_t svld1q_gather_u64base_index_f16(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s32))) +svint32_t svld1q_gather_u64base_index_s32(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s64))) +svint64_t svld1q_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s16))) +svint16_t svld1q_gather_u64base_index_s16(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u8))) +svuint8_t svld1q_gather_u64base_offset_u8(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u32))) +svuint32_t svld1q_gather_u64base_offset_u32(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u64))) +svuint64_t svld1q_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u16))) +svuint16_t svld1q_gather_u64base_offset_u16(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_bf16))) +svbfloat16_t svld1q_gather_u64base_offset_bf16(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s8))) +svint8_t svld1q_gather_u64base_offset_s8(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f64))) +svfloat64_t svld1q_gather_u64base_offset_f64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f32))) +svfloat32_t svld1q_gather_u64base_offset_f32(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f16))) +svfloat16_t svld1q_gather_u64base_offset_f16(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s32))) +svint32_t svld1q_gather_u64base_offset_s32(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s64))) +svint64_t svld1q_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_mf8))) +svmfloat8_t svld1q_gather_u64base_offset_mf8(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s16))) +svint16_t svld1q_gather_u64base_offset_s16(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u8))) +svuint8_t svld1q_gather_u64base_u8(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u32))) +svuint32_t svld1q_gather_u64base_u32(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u64))) +svuint64_t svld1q_gather_u64base_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u16))) +svuint16_t svld1q_gather_u64base_u16(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_bf16))) +svbfloat16_t svld1q_gather_u64base_bf16(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s8))) +svint8_t svld1q_gather_u64base_s8(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f64))) +svfloat64_t svld1q_gather_u64base_f64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f32))) +svfloat32_t svld1q_gather_u64base_f32(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f16))) +svfloat16_t svld1q_gather_u64base_f16(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s32))) +svint32_t svld1q_gather_u64base_s32(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s64))) +svint64_t svld1q_gather_u64base_s64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_mf8))) +svmfloat8_t svld1q_gather_u64base_mf8(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s16))) +svint16_t svld1q_gather_u64base_s16(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u32))) +svuint32_t svld1q_gather_u64index_u32(svbool_t, uint32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u64))) +svuint64_t svld1q_gather_u64index_u64(svbool_t, uint64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u16))) +svuint16_t svld1q_gather_u64index_u16(svbool_t, uint16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_bf16))) +svbfloat16_t svld1q_gather_u64index_bf16(svbool_t, bfloat16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f64))) +svfloat64_t svld1q_gather_u64index_f64(svbool_t, float64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f32))) +svfloat32_t svld1q_gather_u64index_f32(svbool_t, float32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f16))) +svfloat16_t svld1q_gather_u64index_f16(svbool_t, float16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s32))) +svint32_t svld1q_gather_u64index_s32(svbool_t, int32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s64))) +svint64_t svld1q_gather_u64index_s64(svbool_t, int64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s16))) +svint16_t svld1q_gather_u64index_s16(svbool_t, int16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u8))) +svuint8_t svld1q_gather_u64offset_u8(svbool_t, uint8_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u32))) +svuint32_t svld1q_gather_u64offset_u32(svbool_t, uint32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u64))) +svuint64_t svld1q_gather_u64offset_u64(svbool_t, uint64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u16))) +svuint16_t svld1q_gather_u64offset_u16(svbool_t, uint16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_bf16))) +svbfloat16_t svld1q_gather_u64offset_bf16(svbool_t, bfloat16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s8))) +svint8_t svld1q_gather_u64offset_s8(svbool_t, int8_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f64))) +svfloat64_t svld1q_gather_u64offset_f64(svbool_t, float64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f32))) +svfloat32_t svld1q_gather_u64offset_f32(svbool_t, float32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f16))) +svfloat16_t svld1q_gather_u64offset_f16(svbool_t, float16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s32))) +svint32_t svld1q_gather_u64offset_s32(svbool_t, int32_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s64))) +svint64_t svld1q_gather_u64offset_s64(svbool_t, int64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_mf8))) +svmfloat8_t svld1q_gather_u64offset_mf8(svbool_t, mfloat8_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s16))) +svint16_t svld1q_gather_u64offset_s16(svbool_t, int16_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_u64))) +svuint64_t svld1udq_u64(svbool_t, uint64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_f64))) +svfloat64_t svld1udq_f64(svbool_t, float64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_s64))) +svint64_t svld1udq_s64(svbool_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_u64))) +svuint64_t svld1udq_vnum_u64(svbool_t, uint64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_f64))) +svfloat64_t svld1udq_vnum_f64(svbool_t, float64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_s64))) +svint64_t svld1udq_vnum_s64(svbool_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_u32))) +svuint32_t svld1uwq_u32(svbool_t, uint32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_f32))) +svfloat32_t svld1uwq_f32(svbool_t, float32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_s32))) +svint32_t svld1uwq_s32(svbool_t, int32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_u32))) +svuint32_t svld1uwq_vnum_u32(svbool_t, uint32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_f32))) +svfloat32_t svld1uwq_vnum_f32(svbool_t, float32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_s32))) +svint32_t svld1uwq_vnum_s32(svbool_t, int32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_u64))) +void svst1dq_u64(svbool_t, uint64_t *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_f64))) +void svst1dq_f64(svbool_t, float64_t *, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_s64))) +void svst1dq_s64(svbool_t, int64_t *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_u64))) +void svst1dq_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_f64))) +void svst1dq_vnum_f64(svbool_t, float64_t *, int64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_s64))) +void svst1dq_vnum_s64(svbool_t, int64_t *, int64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u8))) +void svst1q_scatter_u64base_u8(svbool_t, svuint64_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u32))) +void svst1q_scatter_u64base_u32(svbool_t, svuint64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u64))) +void svst1q_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u16))) +void svst1q_scatter_u64base_u16(svbool_t, svuint64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_bf16))) +void svst1q_scatter_u64base_bf16(svbool_t, svuint64_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s8))) +void svst1q_scatter_u64base_s8(svbool_t, svuint64_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f64))) +void svst1q_scatter_u64base_f64(svbool_t, svuint64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f32))) +void svst1q_scatter_u64base_f32(svbool_t, svuint64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f16))) +void svst1q_scatter_u64base_f16(svbool_t, svuint64_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s32))) +void svst1q_scatter_u64base_s32(svbool_t, svuint64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s64))) +void svst1q_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_mf8))) +void svst1q_scatter_u64base_mf8(svbool_t, svuint64_t, svmfloat8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s16))) +void svst1q_scatter_u64base_s16(svbool_t, svuint64_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u32))) +void svst1q_scatter_u64base_index_u32(svbool_t, svuint64_t, int64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u64))) +void svst1q_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u16))) +void svst1q_scatter_u64base_index_u16(svbool_t, svuint64_t, int64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_bf16))) +void svst1q_scatter_u64base_index_bf16(svbool_t, svuint64_t, int64_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f64))) +void svst1q_scatter_u64base_index_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f32))) +void svst1q_scatter_u64base_index_f32(svbool_t, svuint64_t, int64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f16))) +void svst1q_scatter_u64base_index_f16(svbool_t, svuint64_t, int64_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s32))) +void svst1q_scatter_u64base_index_s32(svbool_t, svuint64_t, int64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s64))) +void svst1q_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s16))) +void svst1q_scatter_u64base_index_s16(svbool_t, svuint64_t, int64_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u8))) +void svst1q_scatter_u64base_offset_u8(svbool_t, svuint64_t, int64_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u32))) +void svst1q_scatter_u64base_offset_u32(svbool_t, svuint64_t, int64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u64))) +void svst1q_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u16))) +void svst1q_scatter_u64base_offset_u16(svbool_t, svuint64_t, int64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_bf16))) +void svst1q_scatter_u64base_offset_bf16(svbool_t, svuint64_t, int64_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s8))) +void svst1q_scatter_u64base_offset_s8(svbool_t, svuint64_t, int64_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f64))) +void svst1q_scatter_u64base_offset_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f32))) +void svst1q_scatter_u64base_offset_f32(svbool_t, svuint64_t, int64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f16))) +void svst1q_scatter_u64base_offset_f16(svbool_t, svuint64_t, int64_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s32))) +void svst1q_scatter_u64base_offset_s32(svbool_t, svuint64_t, int64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s64))) +void svst1q_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_mf8))) +void svst1q_scatter_u64base_offset_mf8(svbool_t, svuint64_t, int64_t, svmfloat8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s16))) +void svst1q_scatter_u64base_offset_s16(svbool_t, svuint64_t, int64_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u32))) +void svst1q_scatter_s64index_u32(svbool_t, uint32_t *, svint64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u64))) +void svst1q_scatter_s64index_u64(svbool_t, uint64_t *, svint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u16))) +void svst1q_scatter_s64index_u16(svbool_t, uint16_t *, svint64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_bf16))) +void svst1q_scatter_s64index_bf16(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f64))) +void svst1q_scatter_s64index_f64(svbool_t, float64_t *, svint64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f32))) +void svst1q_scatter_s64index_f32(svbool_t, float32_t *, svint64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f16))) +void svst1q_scatter_s64index_f16(svbool_t, float16_t *, svint64_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s32))) +void svst1q_scatter_s64index_s32(svbool_t, int32_t *, svint64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s64))) +void svst1q_scatter_s64index_s64(svbool_t, int64_t *, svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s16))) +void svst1q_scatter_s64index_s16(svbool_t, int16_t *, svint64_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u32))) +void svst1q_scatter_u64index_u32(svbool_t, uint32_t *, svuint64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u64))) +void svst1q_scatter_u64index_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u16))) +void svst1q_scatter_u64index_u16(svbool_t, uint16_t *, svuint64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_bf16))) +void svst1q_scatter_u64index_bf16(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f64))) +void svst1q_scatter_u64index_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f32))) +void svst1q_scatter_u64index_f32(svbool_t, float32_t *, svuint64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f16))) +void svst1q_scatter_u64index_f16(svbool_t, float16_t *, svuint64_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s32))) +void svst1q_scatter_u64index_s32(svbool_t, int32_t *, svuint64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s64))) +void svst1q_scatter_u64index_s64(svbool_t, int64_t *, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s16))) +void svst1q_scatter_u64index_s16(svbool_t, int16_t *, svuint64_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u8))) +void svst1q_scatter_s64offset_u8(svbool_t, uint8_t *, svint64_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u32))) +void svst1q_scatter_s64offset_u32(svbool_t, uint32_t *, svint64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u64))) +void svst1q_scatter_s64offset_u64(svbool_t, uint64_t *, svint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u16))) +void svst1q_scatter_s64offset_u16(svbool_t, uint16_t *, svint64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_bf16))) +void svst1q_scatter_s64offset_bf16(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s8))) +void svst1q_scatter_s64offset_s8(svbool_t, int8_t *, svint64_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f64))) +void svst1q_scatter_s64offset_f64(svbool_t, float64_t *, svint64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f32))) +void svst1q_scatter_s64offset_f32(svbool_t, float32_t *, svint64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f16))) +void svst1q_scatter_s64offset_f16(svbool_t, float16_t *, svint64_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s32))) +void svst1q_scatter_s64offset_s32(svbool_t, int32_t *, svint64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s64))) +void svst1q_scatter_s64offset_s64(svbool_t, int64_t *, svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_mf8))) +void svst1q_scatter_s64offset_mf8(svbool_t, mfloat8_t *, svint64_t, svmfloat8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s16))) +void svst1q_scatter_s64offset_s16(svbool_t, int16_t *, svint64_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u8))) +void svst1q_scatter_u64offset_u8(svbool_t, uint8_t *, svuint64_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u32))) +void svst1q_scatter_u64offset_u32(svbool_t, uint32_t *, svuint64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u64))) +void svst1q_scatter_u64offset_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u16))) +void svst1q_scatter_u64offset_u16(svbool_t, uint16_t *, svuint64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_bf16))) +void svst1q_scatter_u64offset_bf16(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s8))) +void svst1q_scatter_u64offset_s8(svbool_t, int8_t *, svuint64_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f64))) +void svst1q_scatter_u64offset_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f32))) +void svst1q_scatter_u64offset_f32(svbool_t, float32_t *, svuint64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f16))) +void svst1q_scatter_u64offset_f16(svbool_t, float16_t *, svuint64_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s32))) +void svst1q_scatter_u64offset_s32(svbool_t, int32_t *, svuint64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s64))) +void svst1q_scatter_u64offset_s64(svbool_t, int64_t *, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_mf8))) +void svst1q_scatter_u64offset_mf8(svbool_t, mfloat8_t *, svuint64_t, svmfloat8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s16))) +void svst1q_scatter_u64offset_s16(svbool_t, int16_t *, svuint64_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_u32))) +void svst1wq_u32(svbool_t, uint32_t *, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_f32))) +void svst1wq_f32(svbool_t, float32_t *, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_s32))) +void svst1wq_s32(svbool_t, int32_t *, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_u32))) +void svst1wq_vnum_u32(svbool_t, uint32_t *, int64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_f32))) +void svst1wq_vnum_f32(svbool_t, float32_t *, int64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_s32))) +void svst1wq_vnum_s32(svbool_t, int32_t *, int64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u32))) +svuint32_t svld1q_gather_index_u32(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u64))) +svuint64_t svld1q_gather_index_u64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u16))) +svuint16_t svld1q_gather_index_u16(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_bf16))) +svbfloat16_t svld1q_gather_index_bf16(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f64))) +svfloat64_t svld1q_gather_index_f64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f32))) +svfloat32_t svld1q_gather_index_f32(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f16))) +svfloat16_t svld1q_gather_index_f16(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s32))) +svint32_t svld1q_gather_index_s32(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s64))) +svint64_t svld1q_gather_index_s64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s16))) +svint16_t svld1q_gather_index_s16(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u8))) +svuint8_t svld1q_gather_offset_u8(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u32))) +svuint32_t svld1q_gather_offset_u32(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u64))) +svuint64_t svld1q_gather_offset_u64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u16))) +svuint16_t svld1q_gather_offset_u16(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_bf16))) +svbfloat16_t svld1q_gather_offset_bf16(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s8))) +svint8_t svld1q_gather_offset_s8(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f64))) +svfloat64_t svld1q_gather_offset_f64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f32))) +svfloat32_t svld1q_gather_offset_f32(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f16))) +svfloat16_t svld1q_gather_offset_f16(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s32))) +svint32_t svld1q_gather_offset_s32(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s64))) +svint64_t svld1q_gather_offset_s64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_mf8))) +svmfloat8_t svld1q_gather_offset_mf8(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s16))) +svint16_t svld1q_gather_offset_s16(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u8))) +svuint8_t svld1q_gather_u8(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u32))) +svuint32_t svld1q_gather_u32(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u64))) +svuint64_t svld1q_gather_u64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u16))) +svuint16_t svld1q_gather_u16(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_bf16))) +svbfloat16_t svld1q_gather_bf16(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s8))) +svint8_t svld1q_gather_s8(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f64))) +svfloat64_t svld1q_gather_f64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f32))) +svfloat32_t svld1q_gather_f32(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f16))) +svfloat16_t svld1q_gather_f16(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s32))) +svint32_t svld1q_gather_s32(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s64))) +svint64_t svld1q_gather_s64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_mf8))) +svmfloat8_t svld1q_gather_mf8(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s16))) +svint16_t svld1q_gather_s16(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u32))) +svuint32_t svld1q_gather_index(svbool_t, uint32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u64))) +svuint64_t svld1q_gather_index(svbool_t, uint64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u16))) +svuint16_t svld1q_gather_index(svbool_t, uint16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_bf16))) +svbfloat16_t svld1q_gather_index(svbool_t, bfloat16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f64))) +svfloat64_t svld1q_gather_index(svbool_t, float64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f32))) +svfloat32_t svld1q_gather_index(svbool_t, float32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f16))) +svfloat16_t svld1q_gather_index(svbool_t, float16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s32))) +svint32_t svld1q_gather_index(svbool_t, int32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s64))) +svint64_t svld1q_gather_index(svbool_t, int64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s16))) +svint16_t svld1q_gather_index(svbool_t, int16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u8))) +svuint8_t svld1q_gather_offset(svbool_t, uint8_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u32))) +svuint32_t svld1q_gather_offset(svbool_t, uint32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u64))) +svuint64_t svld1q_gather_offset(svbool_t, uint64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u16))) +svuint16_t svld1q_gather_offset(svbool_t, uint16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_bf16))) +svbfloat16_t svld1q_gather_offset(svbool_t, bfloat16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s8))) +svint8_t svld1q_gather_offset(svbool_t, int8_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f64))) +svfloat64_t svld1q_gather_offset(svbool_t, float64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f32))) +svfloat32_t svld1q_gather_offset(svbool_t, float32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f16))) +svfloat16_t svld1q_gather_offset(svbool_t, float16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s32))) +svint32_t svld1q_gather_offset(svbool_t, int32_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s64))) +svint64_t svld1q_gather_offset(svbool_t, int64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_mf8))) +svmfloat8_t svld1q_gather_offset(svbool_t, mfloat8_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s16))) +svint16_t svld1q_gather_offset(svbool_t, int16_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_u64))) +svuint64_t svld1udq(svbool_t, uint64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_f64))) +svfloat64_t svld1udq(svbool_t, float64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_s64))) +svint64_t svld1udq(svbool_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_u64))) +svuint64_t svld1udq_vnum(svbool_t, uint64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_f64))) +svfloat64_t svld1udq_vnum(svbool_t, float64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_s64))) +svint64_t svld1udq_vnum(svbool_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_u32))) +svuint32_t svld1uwq(svbool_t, uint32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_f32))) +svfloat32_t svld1uwq(svbool_t, float32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_s32))) +svint32_t svld1uwq(svbool_t, int32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_u32))) +svuint32_t svld1uwq_vnum(svbool_t, uint32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_f32))) +svfloat32_t svld1uwq_vnum(svbool_t, float32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_s32))) +svint32_t svld1uwq_vnum(svbool_t, int32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_u64))) +void svst1dq(svbool_t, uint64_t *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_f64))) +void svst1dq(svbool_t, float64_t *, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_s64))) +void svst1dq(svbool_t, int64_t *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_u64))) +void svst1dq_vnum(svbool_t, uint64_t *, int64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_f64))) +void svst1dq_vnum(svbool_t, float64_t *, int64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_s64))) +void svst1dq_vnum(svbool_t, int64_t *, int64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u8))) +void svst1q_scatter(svbool_t, svuint64_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u32))) +void svst1q_scatter(svbool_t, svuint64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u64))) +void svst1q_scatter(svbool_t, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u16))) +void svst1q_scatter(svbool_t, svuint64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_bf16))) +void svst1q_scatter(svbool_t, svuint64_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s8))) +void svst1q_scatter(svbool_t, svuint64_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f64))) +void svst1q_scatter(svbool_t, svuint64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f32))) +void svst1q_scatter(svbool_t, svuint64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f16))) +void svst1q_scatter(svbool_t, svuint64_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s32))) +void svst1q_scatter(svbool_t, svuint64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s64))) +void svst1q_scatter(svbool_t, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_mf8))) +void svst1q_scatter(svbool_t, svuint64_t, svmfloat8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s16))) +void svst1q_scatter(svbool_t, svuint64_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u32))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u64))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u16))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_bf16))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f64))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f32))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f16))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s32))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s64))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s16))) +void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u8))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u32))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u64))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u16))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_bf16))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s8))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f64))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f32))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f16))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s32))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s64))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_mf8))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svmfloat8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s16))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u32))) +void svst1q_scatter_index(svbool_t, uint32_t *, svint64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u64))) +void svst1q_scatter_index(svbool_t, uint64_t *, svint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u16))) +void svst1q_scatter_index(svbool_t, uint16_t *, svint64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_bf16))) +void svst1q_scatter_index(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f64))) +void svst1q_scatter_index(svbool_t, float64_t *, svint64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f32))) +void svst1q_scatter_index(svbool_t, float32_t *, svint64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f16))) +void svst1q_scatter_index(svbool_t, float16_t *, svint64_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s32))) +void svst1q_scatter_index(svbool_t, int32_t *, svint64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s64))) +void svst1q_scatter_index(svbool_t, int64_t *, svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s16))) +void svst1q_scatter_index(svbool_t, int16_t *, svint64_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u32))) +void svst1q_scatter_index(svbool_t, uint32_t *, svuint64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u64))) +void svst1q_scatter_index(svbool_t, uint64_t *, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u16))) +void svst1q_scatter_index(svbool_t, uint16_t *, svuint64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_bf16))) +void svst1q_scatter_index(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f64))) +void svst1q_scatter_index(svbool_t, float64_t *, svuint64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f32))) +void svst1q_scatter_index(svbool_t, float32_t *, svuint64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f16))) +void svst1q_scatter_index(svbool_t, float16_t *, svuint64_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s32))) +void svst1q_scatter_index(svbool_t, int32_t *, svuint64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s64))) +void svst1q_scatter_index(svbool_t, int64_t *, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s16))) +void svst1q_scatter_index(svbool_t, int16_t *, svuint64_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u8))) +void svst1q_scatter_offset(svbool_t, uint8_t *, svint64_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u32))) +void svst1q_scatter_offset(svbool_t, uint32_t *, svint64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u64))) +void svst1q_scatter_offset(svbool_t, uint64_t *, svint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u16))) +void svst1q_scatter_offset(svbool_t, uint16_t *, svint64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_bf16))) +void svst1q_scatter_offset(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s8))) +void svst1q_scatter_offset(svbool_t, int8_t *, svint64_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f64))) +void svst1q_scatter_offset(svbool_t, float64_t *, svint64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f32))) +void svst1q_scatter_offset(svbool_t, float32_t *, svint64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f16))) +void svst1q_scatter_offset(svbool_t, float16_t *, svint64_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s32))) +void svst1q_scatter_offset(svbool_t, int32_t *, svint64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s64))) +void svst1q_scatter_offset(svbool_t, int64_t *, svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_mf8))) +void svst1q_scatter_offset(svbool_t, mfloat8_t *, svint64_t, svmfloat8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s16))) +void svst1q_scatter_offset(svbool_t, int16_t *, svint64_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u8))) +void svst1q_scatter_offset(svbool_t, uint8_t *, svuint64_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u32))) +void svst1q_scatter_offset(svbool_t, uint32_t *, svuint64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u64))) +void svst1q_scatter_offset(svbool_t, uint64_t *, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u16))) +void svst1q_scatter_offset(svbool_t, uint16_t *, svuint64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_bf16))) +void svst1q_scatter_offset(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s8))) +void svst1q_scatter_offset(svbool_t, int8_t *, svuint64_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f64))) +void svst1q_scatter_offset(svbool_t, float64_t *, svuint64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f32))) +void svst1q_scatter_offset(svbool_t, float32_t *, svuint64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f16))) +void svst1q_scatter_offset(svbool_t, float16_t *, svuint64_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s32))) +void svst1q_scatter_offset(svbool_t, int32_t *, svuint64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s64))) +void svst1q_scatter_offset(svbool_t, int64_t *, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_mf8))) +void svst1q_scatter_offset(svbool_t, mfloat8_t *, svuint64_t, svmfloat8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s16))) +void svst1q_scatter_offset(svbool_t, int16_t *, svuint64_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_u32))) +void svst1wq(svbool_t, uint32_t *, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_f32))) +void svst1wq(svbool_t, float32_t *, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_s32))) +void svst1wq(svbool_t, int32_t *, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_u32))) +void svst1wq_vnum(svbool_t, uint32_t *, int64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_f32))) +void svst1wq_vnum(svbool_t, float32_t *, int64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_s32))) +void svst1wq_vnum(svbool_t, int32_t *, int64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c8))) +uint64_t svcntp_c8(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c32))) +uint64_t svcntp_c32(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c64))) +uint64_t svcntp_c64(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c16))) +uint64_t svcntp_c16(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x2))) +svuint8x2_t svld1_u8_x2(svcount_t, uint8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x2))) +svint8x2_t svld1_s8_x2(svcount_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x2))) +svmfloat8x2_t svld1_mf8_x2(svcount_t, mfloat8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x2))) +svuint64x2_t svld1_u64_x2(svcount_t, uint64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x2))) +svfloat64x2_t svld1_f64_x2(svcount_t, float64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x2))) +svint64x2_t svld1_s64_x2(svcount_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x2))) +svuint16x2_t svld1_u16_x2(svcount_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x2))) +svbfloat16x2_t svld1_bf16_x2(svcount_t, bfloat16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x2))) +svfloat16x2_t svld1_f16_x2(svcount_t, float16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x2))) +svint16x2_t svld1_s16_x2(svcount_t, int16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x2))) +svuint32x2_t svld1_u32_x2(svcount_t, uint32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x2))) +svfloat32x2_t svld1_f32_x2(svcount_t, float32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x2))) +svint32x2_t svld1_s32_x2(svcount_t, int32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x4))) +svuint8x4_t svld1_u8_x4(svcount_t, uint8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x4))) +svint8x4_t svld1_s8_x4(svcount_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x4))) +svmfloat8x4_t svld1_mf8_x4(svcount_t, mfloat8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x4))) +svuint64x4_t svld1_u64_x4(svcount_t, uint64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x4))) +svfloat64x4_t svld1_f64_x4(svcount_t, float64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x4))) +svint64x4_t svld1_s64_x4(svcount_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x4))) +svuint16x4_t svld1_u16_x4(svcount_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x4))) +svbfloat16x4_t svld1_bf16_x4(svcount_t, bfloat16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x4))) +svfloat16x4_t svld1_f16_x4(svcount_t, float16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x4))) +svint16x4_t svld1_s16_x4(svcount_t, int16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x4))) +svuint32x4_t svld1_u32_x4(svcount_t, uint32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x4))) +svfloat32x4_t svld1_f32_x4(svcount_t, float32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x4))) +svint32x4_t svld1_s32_x4(svcount_t, int32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x2))) +svuint8x2_t svld1_vnum_u8_x2(svcount_t, uint8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x2))) +svint8x2_t svld1_vnum_s8_x2(svcount_t, int8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x2))) +svmfloat8x2_t svld1_vnum_mf8_x2(svcount_t, mfloat8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x2))) +svuint64x2_t svld1_vnum_u64_x2(svcount_t, uint64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x2))) +svfloat64x2_t svld1_vnum_f64_x2(svcount_t, float64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x2))) +svint64x2_t svld1_vnum_s64_x2(svcount_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x2))) +svuint16x2_t svld1_vnum_u16_x2(svcount_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x2))) +svbfloat16x2_t svld1_vnum_bf16_x2(svcount_t, bfloat16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x2))) +svfloat16x2_t svld1_vnum_f16_x2(svcount_t, float16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x2))) +svint16x2_t svld1_vnum_s16_x2(svcount_t, int16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x2))) +svuint32x2_t svld1_vnum_u32_x2(svcount_t, uint32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x2))) +svfloat32x2_t svld1_vnum_f32_x2(svcount_t, float32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x2))) +svint32x2_t svld1_vnum_s32_x2(svcount_t, int32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x4))) +svuint8x4_t svld1_vnum_u8_x4(svcount_t, uint8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x4))) +svint8x4_t svld1_vnum_s8_x4(svcount_t, int8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x4))) +svmfloat8x4_t svld1_vnum_mf8_x4(svcount_t, mfloat8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x4))) +svuint64x4_t svld1_vnum_u64_x4(svcount_t, uint64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x4))) +svfloat64x4_t svld1_vnum_f64_x4(svcount_t, float64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x4))) +svint64x4_t svld1_vnum_s64_x4(svcount_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x4))) +svuint16x4_t svld1_vnum_u16_x4(svcount_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x4))) +svbfloat16x4_t svld1_vnum_bf16_x4(svcount_t, bfloat16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x4))) +svfloat16x4_t svld1_vnum_f16_x4(svcount_t, float16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x4))) +svint16x4_t svld1_vnum_s16_x4(svcount_t, int16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x4))) +svuint32x4_t svld1_vnum_u32_x4(svcount_t, uint32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x4))) +svfloat32x4_t svld1_vnum_f32_x4(svcount_t, float32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x4))) +svint32x4_t svld1_vnum_s32_x4(svcount_t, int32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x2))) +svuint8x2_t svldnt1_u8_x2(svcount_t, uint8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x2))) +svint8x2_t svldnt1_s8_x2(svcount_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x2))) +svmfloat8x2_t svldnt1_mf8_x2(svcount_t, mfloat8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x2))) +svuint64x2_t svldnt1_u64_x2(svcount_t, uint64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x2))) +svfloat64x2_t svldnt1_f64_x2(svcount_t, float64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x2))) +svint64x2_t svldnt1_s64_x2(svcount_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x2))) +svuint16x2_t svldnt1_u16_x2(svcount_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x2))) +svbfloat16x2_t svldnt1_bf16_x2(svcount_t, bfloat16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x2))) +svfloat16x2_t svldnt1_f16_x2(svcount_t, float16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x2))) +svint16x2_t svldnt1_s16_x2(svcount_t, int16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x2))) +svuint32x2_t svldnt1_u32_x2(svcount_t, uint32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x2))) +svfloat32x2_t svldnt1_f32_x2(svcount_t, float32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x2))) +svint32x2_t svldnt1_s32_x2(svcount_t, int32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x4))) +svuint8x4_t svldnt1_u8_x4(svcount_t, uint8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x4))) +svint8x4_t svldnt1_s8_x4(svcount_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x4))) +svmfloat8x4_t svldnt1_mf8_x4(svcount_t, mfloat8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x4))) +svuint64x4_t svldnt1_u64_x4(svcount_t, uint64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x4))) +svfloat64x4_t svldnt1_f64_x4(svcount_t, float64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x4))) +svint64x4_t svldnt1_s64_x4(svcount_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x4))) +svuint16x4_t svldnt1_u16_x4(svcount_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x4))) +svbfloat16x4_t svldnt1_bf16_x4(svcount_t, bfloat16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x4))) +svfloat16x4_t svldnt1_f16_x4(svcount_t, float16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x4))) +svint16x4_t svldnt1_s16_x4(svcount_t, int16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x4))) +svuint32x4_t svldnt1_u32_x4(svcount_t, uint32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x4))) +svfloat32x4_t svldnt1_f32_x4(svcount_t, float32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x4))) +svint32x4_t svldnt1_s32_x4(svcount_t, int32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x2))) +svuint8x2_t svldnt1_vnum_u8_x2(svcount_t, uint8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x2))) +svint8x2_t svldnt1_vnum_s8_x2(svcount_t, int8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x2))) +svmfloat8x2_t svldnt1_vnum_mf8_x2(svcount_t, mfloat8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x2))) +svuint64x2_t svldnt1_vnum_u64_x2(svcount_t, uint64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x2))) +svfloat64x2_t svldnt1_vnum_f64_x2(svcount_t, float64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x2))) +svint64x2_t svldnt1_vnum_s64_x2(svcount_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x2))) +svuint16x2_t svldnt1_vnum_u16_x2(svcount_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x2))) +svbfloat16x2_t svldnt1_vnum_bf16_x2(svcount_t, bfloat16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x2))) +svfloat16x2_t svldnt1_vnum_f16_x2(svcount_t, float16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x2))) +svint16x2_t svldnt1_vnum_s16_x2(svcount_t, int16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x2))) +svuint32x2_t svldnt1_vnum_u32_x2(svcount_t, uint32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x2))) +svfloat32x2_t svldnt1_vnum_f32_x2(svcount_t, float32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x2))) +svint32x2_t svldnt1_vnum_s32_x2(svcount_t, int32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x4))) +svuint8x4_t svldnt1_vnum_u8_x4(svcount_t, uint8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x4))) +svint8x4_t svldnt1_vnum_s8_x4(svcount_t, int8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x4))) +svmfloat8x4_t svldnt1_vnum_mf8_x4(svcount_t, mfloat8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x4))) +svuint64x4_t svldnt1_vnum_u64_x4(svcount_t, uint64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x4))) +svfloat64x4_t svldnt1_vnum_f64_x4(svcount_t, float64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x4))) +svint64x4_t svldnt1_vnum_s64_x4(svcount_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x4))) +svuint16x4_t svldnt1_vnum_u16_x4(svcount_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x4))) +svbfloat16x4_t svldnt1_vnum_bf16_x4(svcount_t, bfloat16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x4))) +svfloat16x4_t svldnt1_vnum_f16_x4(svcount_t, float16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x4))) +svint16x4_t svldnt1_vnum_s16_x4(svcount_t, int16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x4))) +svuint32x4_t svldnt1_vnum_u32_x4(svcount_t, uint32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x4))) +svfloat32x4_t svldnt1_vnum_f32_x4(svcount_t, float32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x4))) +svint32x4_t svldnt1_vnum_s32_x4(svcount_t, int32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c8))) +svbool_t svpext_lane_c8(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c32))) +svbool_t svpext_lane_c32(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c64))) +svbool_t svpext_lane_c64(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c16))) +svbool_t svpext_lane_c16(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c8_x2))) +svboolx2_t svpext_lane_c8_x2(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c32_x2))) +svboolx2_t svpext_lane_c32_x2(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c64_x2))) +svboolx2_t svpext_lane_c64_x2(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c16_x2))) +svboolx2_t svpext_lane_c16_x2(svcount_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpfalse_c))) +svcount_t svpfalse_c(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c16))) +svcount_t svpsel_lane_c16(svcount_t, svbool_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c32))) +svcount_t svpsel_lane_c32(svcount_t, svbool_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c64))) +svcount_t svpsel_lane_c64(svcount_t, svbool_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c8))) +svcount_t svpsel_lane_c8(svcount_t, svbool_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c8))) +svcount_t svptrue_c8(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c32))) +svcount_t svptrue_c32(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c64))) +svcount_t svptrue_c64(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c16))) +svcount_t svptrue_c16(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_b))) +svbool_t svreinterpret_b(svcount_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_c))) +svcount_t svreinterpret_c(svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x2))) +void svst1_u8_x2(svcount_t, uint8_t *, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x2))) +void svst1_s8_x2(svcount_t, int8_t *, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x2))) +void svst1_mf8_x2(svcount_t, mfloat8_t *, svmfloat8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x2))) +void svst1_u64_x2(svcount_t, uint64_t *, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x2))) +void svst1_f64_x2(svcount_t, float64_t *, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x2))) +void svst1_s64_x2(svcount_t, int64_t *, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x2))) +void svst1_u16_x2(svcount_t, uint16_t *, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x2))) +void svst1_bf16_x2(svcount_t, bfloat16_t *, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x2))) +void svst1_f16_x2(svcount_t, float16_t *, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x2))) +void svst1_s16_x2(svcount_t, int16_t *, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x2))) +void svst1_u32_x2(svcount_t, uint32_t *, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x2))) +void svst1_f32_x2(svcount_t, float32_t *, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x2))) +void svst1_s32_x2(svcount_t, int32_t *, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x4))) +void svst1_u8_x4(svcount_t, uint8_t *, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x4))) +void svst1_s8_x4(svcount_t, int8_t *, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x4))) +void svst1_mf8_x4(svcount_t, mfloat8_t *, svmfloat8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x4))) +void svst1_u64_x4(svcount_t, uint64_t *, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x4))) +void svst1_f64_x4(svcount_t, float64_t *, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x4))) +void svst1_s64_x4(svcount_t, int64_t *, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x4))) +void svst1_u16_x4(svcount_t, uint16_t *, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x4))) +void svst1_bf16_x4(svcount_t, bfloat16_t *, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x4))) +void svst1_f16_x4(svcount_t, float16_t *, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x4))) +void svst1_s16_x4(svcount_t, int16_t *, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x4))) +void svst1_u32_x4(svcount_t, uint32_t *, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x4))) +void svst1_f32_x4(svcount_t, float32_t *, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x4))) +void svst1_s32_x4(svcount_t, int32_t *, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x2))) +void svst1_vnum_u8_x2(svcount_t, uint8_t *, int64_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x2))) +void svst1_vnum_s8_x2(svcount_t, int8_t *, int64_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x2))) +void svst1_vnum_mf8_x2(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x2))) +void svst1_vnum_u64_x2(svcount_t, uint64_t *, int64_t, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x2))) +void svst1_vnum_f64_x2(svcount_t, float64_t *, int64_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x2))) +void svst1_vnum_s64_x2(svcount_t, int64_t *, int64_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x2))) +void svst1_vnum_u16_x2(svcount_t, uint16_t *, int64_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x2))) +void svst1_vnum_bf16_x2(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x2))) +void svst1_vnum_f16_x2(svcount_t, float16_t *, int64_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x2))) +void svst1_vnum_s16_x2(svcount_t, int16_t *, int64_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x2))) +void svst1_vnum_u32_x2(svcount_t, uint32_t *, int64_t, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x2))) +void svst1_vnum_f32_x2(svcount_t, float32_t *, int64_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x2))) +void svst1_vnum_s32_x2(svcount_t, int32_t *, int64_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x4))) +void svst1_vnum_u8_x4(svcount_t, uint8_t *, int64_t, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x4))) +void svst1_vnum_s8_x4(svcount_t, int8_t *, int64_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x4))) +void svst1_vnum_mf8_x4(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x4))) +void svst1_vnum_u64_x4(svcount_t, uint64_t *, int64_t, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x4))) +void svst1_vnum_f64_x4(svcount_t, float64_t *, int64_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x4))) +void svst1_vnum_s64_x4(svcount_t, int64_t *, int64_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x4))) +void svst1_vnum_u16_x4(svcount_t, uint16_t *, int64_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x4))) +void svst1_vnum_bf16_x4(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x4))) +void svst1_vnum_f16_x4(svcount_t, float16_t *, int64_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x4))) +void svst1_vnum_s16_x4(svcount_t, int16_t *, int64_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x4))) +void svst1_vnum_u32_x4(svcount_t, uint32_t *, int64_t, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x4))) +void svst1_vnum_f32_x4(svcount_t, float32_t *, int64_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x4))) +void svst1_vnum_s32_x4(svcount_t, int32_t *, int64_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x2))) +void svstnt1_u8_x2(svcount_t, uint8_t *, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x2))) +void svstnt1_s8_x2(svcount_t, int8_t *, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x2))) +void svstnt1_mf8_x2(svcount_t, mfloat8_t *, svmfloat8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x2))) +void svstnt1_u64_x2(svcount_t, uint64_t *, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x2))) +void svstnt1_f64_x2(svcount_t, float64_t *, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x2))) +void svstnt1_s64_x2(svcount_t, int64_t *, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x2))) +void svstnt1_u16_x2(svcount_t, uint16_t *, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x2))) +void svstnt1_bf16_x2(svcount_t, bfloat16_t *, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x2))) +void svstnt1_f16_x2(svcount_t, float16_t *, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x2))) +void svstnt1_s16_x2(svcount_t, int16_t *, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x2))) +void svstnt1_u32_x2(svcount_t, uint32_t *, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x2))) +void svstnt1_f32_x2(svcount_t, float32_t *, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x2))) +void svstnt1_s32_x2(svcount_t, int32_t *, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x4))) +void svstnt1_u8_x4(svcount_t, uint8_t *, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x4))) +void svstnt1_s8_x4(svcount_t, int8_t *, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x4))) +void svstnt1_mf8_x4(svcount_t, mfloat8_t *, svmfloat8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x4))) +void svstnt1_u64_x4(svcount_t, uint64_t *, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x4))) +void svstnt1_f64_x4(svcount_t, float64_t *, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x4))) +void svstnt1_s64_x4(svcount_t, int64_t *, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x4))) +void svstnt1_u16_x4(svcount_t, uint16_t *, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x4))) +void svstnt1_bf16_x4(svcount_t, bfloat16_t *, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x4))) +void svstnt1_f16_x4(svcount_t, float16_t *, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x4))) +void svstnt1_s16_x4(svcount_t, int16_t *, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x4))) +void svstnt1_u32_x4(svcount_t, uint32_t *, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x4))) +void svstnt1_f32_x4(svcount_t, float32_t *, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x4))) +void svstnt1_s32_x4(svcount_t, int32_t *, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x2))) +void svstnt1_vnum_u8_x2(svcount_t, uint8_t *, int64_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x2))) +void svstnt1_vnum_s8_x2(svcount_t, int8_t *, int64_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x2))) +void svstnt1_vnum_mf8_x2(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x2))) +void svstnt1_vnum_u64_x2(svcount_t, uint64_t *, int64_t, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x2))) +void svstnt1_vnum_f64_x2(svcount_t, float64_t *, int64_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x2))) +void svstnt1_vnum_s64_x2(svcount_t, int64_t *, int64_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x2))) +void svstnt1_vnum_u16_x2(svcount_t, uint16_t *, int64_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x2))) +void svstnt1_vnum_bf16_x2(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x2))) +void svstnt1_vnum_f16_x2(svcount_t, float16_t *, int64_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x2))) +void svstnt1_vnum_s16_x2(svcount_t, int16_t *, int64_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x2))) +void svstnt1_vnum_u32_x2(svcount_t, uint32_t *, int64_t, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x2))) +void svstnt1_vnum_f32_x2(svcount_t, float32_t *, int64_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x2))) +void svstnt1_vnum_s32_x2(svcount_t, int32_t *, int64_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x4))) +void svstnt1_vnum_u8_x4(svcount_t, uint8_t *, int64_t, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x4))) +void svstnt1_vnum_s8_x4(svcount_t, int8_t *, int64_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x4))) +void svstnt1_vnum_mf8_x4(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x4))) +void svstnt1_vnum_u64_x4(svcount_t, uint64_t *, int64_t, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x4))) +void svstnt1_vnum_f64_x4(svcount_t, float64_t *, int64_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x4))) +void svstnt1_vnum_s64_x4(svcount_t, int64_t *, int64_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x4))) +void svstnt1_vnum_u16_x4(svcount_t, uint16_t *, int64_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x4))) +void svstnt1_vnum_bf16_x4(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x4))) +void svstnt1_vnum_f16_x4(svcount_t, float16_t *, int64_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x4))) +void svstnt1_vnum_s16_x4(svcount_t, int16_t *, int64_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x4))) +void svstnt1_vnum_u32_x4(svcount_t, uint32_t *, int64_t, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x4))) +void svstnt1_vnum_f32_x4(svcount_t, float32_t *, int64_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x4))) +void svstnt1_vnum_s32_x4(svcount_t, int32_t *, int64_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_s64))) +svcount_t svwhilege_c8_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_s64))) +svcount_t svwhilege_c32_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_s64))) +svcount_t svwhilege_c64_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_s64))) +svcount_t svwhilege_c16_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_u64))) +svcount_t svwhilege_c8_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_u64))) +svcount_t svwhilege_c32_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_u64))) +svcount_t svwhilege_c64_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_u64))) +svcount_t svwhilege_c16_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_s64))) +svcount_t svwhilegt_c8_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_s64))) +svcount_t svwhilegt_c32_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_s64))) +svcount_t svwhilegt_c64_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_s64))) +svcount_t svwhilegt_c16_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_u64))) +svcount_t svwhilegt_c8_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_u64))) +svcount_t svwhilegt_c32_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_u64))) +svcount_t svwhilegt_c64_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_u64))) +svcount_t svwhilegt_c16_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_s64))) +svcount_t svwhilele_c8_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_s64))) +svcount_t svwhilele_c32_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_s64))) +svcount_t svwhilele_c64_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_s64))) +svcount_t svwhilele_c16_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_u64))) +svcount_t svwhilele_c8_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_u64))) +svcount_t svwhilele_c32_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_u64))) +svcount_t svwhilele_c64_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_u64))) +svcount_t svwhilele_c16_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_u64))) +svcount_t svwhilelt_c8_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_u64))) +svcount_t svwhilelt_c32_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_u64))) +svcount_t svwhilelt_c64_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_u64))) +svcount_t svwhilelt_c16_u64(uint64_t, uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_s64))) +svcount_t svwhilelt_c8_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_s64))) +svcount_t svwhilelt_c32_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_s64))) +svcount_t svwhilelt_c64_s64(int64_t, int64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_s64))) +svcount_t svwhilelt_c16_s64(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x2))) +svuint8x2_t svld1_x2(svcount_t, uint8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x2))) +svint8x2_t svld1_x2(svcount_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x2))) +svmfloat8x2_t svld1_x2(svcount_t, mfloat8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x2))) +svuint64x2_t svld1_x2(svcount_t, uint64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x2))) +svfloat64x2_t svld1_x2(svcount_t, float64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x2))) +svint64x2_t svld1_x2(svcount_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x2))) +svuint16x2_t svld1_x2(svcount_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x2))) +svbfloat16x2_t svld1_x2(svcount_t, bfloat16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x2))) +svfloat16x2_t svld1_x2(svcount_t, float16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x2))) +svint16x2_t svld1_x2(svcount_t, int16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x2))) +svuint32x2_t svld1_x2(svcount_t, uint32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x2))) +svfloat32x2_t svld1_x2(svcount_t, float32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x2))) +svint32x2_t svld1_x2(svcount_t, int32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x4))) +svuint8x4_t svld1_x4(svcount_t, uint8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x4))) +svint8x4_t svld1_x4(svcount_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x4))) +svmfloat8x4_t svld1_x4(svcount_t, mfloat8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x4))) +svuint64x4_t svld1_x4(svcount_t, uint64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x4))) +svfloat64x4_t svld1_x4(svcount_t, float64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x4))) +svint64x4_t svld1_x4(svcount_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x4))) +svuint16x4_t svld1_x4(svcount_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x4))) +svbfloat16x4_t svld1_x4(svcount_t, bfloat16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x4))) +svfloat16x4_t svld1_x4(svcount_t, float16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x4))) +svint16x4_t svld1_x4(svcount_t, int16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x4))) +svuint32x4_t svld1_x4(svcount_t, uint32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x4))) +svfloat32x4_t svld1_x4(svcount_t, float32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x4))) +svint32x4_t svld1_x4(svcount_t, int32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x2))) +svuint8x2_t svld1_vnum_x2(svcount_t, uint8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x2))) +svint8x2_t svld1_vnum_x2(svcount_t, int8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x2))) +svmfloat8x2_t svld1_vnum_x2(svcount_t, mfloat8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x2))) +svuint64x2_t svld1_vnum_x2(svcount_t, uint64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x2))) +svfloat64x2_t svld1_vnum_x2(svcount_t, float64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x2))) +svint64x2_t svld1_vnum_x2(svcount_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x2))) +svuint16x2_t svld1_vnum_x2(svcount_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x2))) +svbfloat16x2_t svld1_vnum_x2(svcount_t, bfloat16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x2))) +svfloat16x2_t svld1_vnum_x2(svcount_t, float16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x2))) +svint16x2_t svld1_vnum_x2(svcount_t, int16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x2))) +svuint32x2_t svld1_vnum_x2(svcount_t, uint32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x2))) +svfloat32x2_t svld1_vnum_x2(svcount_t, float32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x2))) +svint32x2_t svld1_vnum_x2(svcount_t, int32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x4))) +svuint8x4_t svld1_vnum_x4(svcount_t, uint8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x4))) +svint8x4_t svld1_vnum_x4(svcount_t, int8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x4))) +svmfloat8x4_t svld1_vnum_x4(svcount_t, mfloat8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x4))) +svuint64x4_t svld1_vnum_x4(svcount_t, uint64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x4))) +svfloat64x4_t svld1_vnum_x4(svcount_t, float64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x4))) +svint64x4_t svld1_vnum_x4(svcount_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x4))) +svuint16x4_t svld1_vnum_x4(svcount_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x4))) +svbfloat16x4_t svld1_vnum_x4(svcount_t, bfloat16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x4))) +svfloat16x4_t svld1_vnum_x4(svcount_t, float16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x4))) +svint16x4_t svld1_vnum_x4(svcount_t, int16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x4))) +svuint32x4_t svld1_vnum_x4(svcount_t, uint32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x4))) +svfloat32x4_t svld1_vnum_x4(svcount_t, float32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x4))) +svint32x4_t svld1_vnum_x4(svcount_t, int32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x2))) +svuint8x2_t svldnt1_x2(svcount_t, uint8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x2))) +svint8x2_t svldnt1_x2(svcount_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x2))) +svmfloat8x2_t svldnt1_x2(svcount_t, mfloat8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x2))) +svuint64x2_t svldnt1_x2(svcount_t, uint64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x2))) +svfloat64x2_t svldnt1_x2(svcount_t, float64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x2))) +svint64x2_t svldnt1_x2(svcount_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x2))) +svuint16x2_t svldnt1_x2(svcount_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x2))) +svbfloat16x2_t svldnt1_x2(svcount_t, bfloat16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x2))) +svfloat16x2_t svldnt1_x2(svcount_t, float16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x2))) +svint16x2_t svldnt1_x2(svcount_t, int16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x2))) +svuint32x2_t svldnt1_x2(svcount_t, uint32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x2))) +svfloat32x2_t svldnt1_x2(svcount_t, float32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x2))) +svint32x2_t svldnt1_x2(svcount_t, int32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x4))) +svuint8x4_t svldnt1_x4(svcount_t, uint8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x4))) +svint8x4_t svldnt1_x4(svcount_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x4))) +svmfloat8x4_t svldnt1_x4(svcount_t, mfloat8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x4))) +svuint64x4_t svldnt1_x4(svcount_t, uint64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x4))) +svfloat64x4_t svldnt1_x4(svcount_t, float64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x4))) +svint64x4_t svldnt1_x4(svcount_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x4))) +svuint16x4_t svldnt1_x4(svcount_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x4))) +svbfloat16x4_t svldnt1_x4(svcount_t, bfloat16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x4))) +svfloat16x4_t svldnt1_x4(svcount_t, float16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x4))) +svint16x4_t svldnt1_x4(svcount_t, int16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x4))) +svuint32x4_t svldnt1_x4(svcount_t, uint32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x4))) +svfloat32x4_t svldnt1_x4(svcount_t, float32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x4))) +svint32x4_t svldnt1_x4(svcount_t, int32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x2))) +svuint8x2_t svldnt1_vnum_x2(svcount_t, uint8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x2))) +svint8x2_t svldnt1_vnum_x2(svcount_t, int8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x2))) +svmfloat8x2_t svldnt1_vnum_x2(svcount_t, mfloat8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x2))) +svuint64x2_t svldnt1_vnum_x2(svcount_t, uint64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x2))) +svfloat64x2_t svldnt1_vnum_x2(svcount_t, float64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x2))) +svint64x2_t svldnt1_vnum_x2(svcount_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x2))) +svuint16x2_t svldnt1_vnum_x2(svcount_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x2))) +svbfloat16x2_t svldnt1_vnum_x2(svcount_t, bfloat16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x2))) +svfloat16x2_t svldnt1_vnum_x2(svcount_t, float16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x2))) +svint16x2_t svldnt1_vnum_x2(svcount_t, int16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x2))) +svuint32x2_t svldnt1_vnum_x2(svcount_t, uint32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x2))) +svfloat32x2_t svldnt1_vnum_x2(svcount_t, float32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x2))) +svint32x2_t svldnt1_vnum_x2(svcount_t, int32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x4))) +svuint8x4_t svldnt1_vnum_x4(svcount_t, uint8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x4))) +svint8x4_t svldnt1_vnum_x4(svcount_t, int8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x4))) +svmfloat8x4_t svldnt1_vnum_x4(svcount_t, mfloat8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x4))) +svuint64x4_t svldnt1_vnum_x4(svcount_t, uint64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x4))) +svfloat64x4_t svldnt1_vnum_x4(svcount_t, float64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x4))) +svint64x4_t svldnt1_vnum_x4(svcount_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x4))) +svuint16x4_t svldnt1_vnum_x4(svcount_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x4))) +svbfloat16x4_t svldnt1_vnum_x4(svcount_t, bfloat16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x4))) +svfloat16x4_t svldnt1_vnum_x4(svcount_t, float16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x4))) +svint16x4_t svldnt1_vnum_x4(svcount_t, int16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x4))) +svuint32x4_t svldnt1_vnum_x4(svcount_t, uint32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x4))) +svfloat32x4_t svldnt1_vnum_x4(svcount_t, float32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x4))) +svint32x4_t svldnt1_vnum_x4(svcount_t, int32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_b))) +svbool_t svreinterpret(svcount_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_c))) +svcount_t svreinterpret(svbool_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x2))) +void svst1(svcount_t, uint8_t *, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x2))) +void svst1(svcount_t, int8_t *, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x2))) +void svst1(svcount_t, mfloat8_t *, svmfloat8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x2))) +void svst1(svcount_t, uint64_t *, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x2))) +void svst1(svcount_t, float64_t *, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x2))) +void svst1(svcount_t, int64_t *, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x2))) +void svst1(svcount_t, uint16_t *, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x2))) +void svst1(svcount_t, bfloat16_t *, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x2))) +void svst1(svcount_t, float16_t *, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x2))) +void svst1(svcount_t, int16_t *, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x2))) +void svst1(svcount_t, uint32_t *, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x2))) +void svst1(svcount_t, float32_t *, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x2))) +void svst1(svcount_t, int32_t *, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x4))) +void svst1(svcount_t, uint8_t *, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x4))) +void svst1(svcount_t, int8_t *, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x4))) +void svst1(svcount_t, mfloat8_t *, svmfloat8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x4))) +void svst1(svcount_t, uint64_t *, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x4))) +void svst1(svcount_t, float64_t *, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x4))) +void svst1(svcount_t, int64_t *, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x4))) +void svst1(svcount_t, uint16_t *, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x4))) +void svst1(svcount_t, bfloat16_t *, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x4))) +void svst1(svcount_t, float16_t *, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x4))) +void svst1(svcount_t, int16_t *, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x4))) +void svst1(svcount_t, uint32_t *, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x4))) +void svst1(svcount_t, float32_t *, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x4))) +void svst1(svcount_t, int32_t *, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x2))) +void svst1_vnum(svcount_t, uint8_t *, int64_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x2))) +void svst1_vnum(svcount_t, int8_t *, int64_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x2))) +void svst1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x2))) +void svst1_vnum(svcount_t, uint64_t *, int64_t, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x2))) +void svst1_vnum(svcount_t, float64_t *, int64_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x2))) +void svst1_vnum(svcount_t, int64_t *, int64_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x2))) +void svst1_vnum(svcount_t, uint16_t *, int64_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x2))) +void svst1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x2))) +void svst1_vnum(svcount_t, float16_t *, int64_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x2))) +void svst1_vnum(svcount_t, int16_t *, int64_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x2))) +void svst1_vnum(svcount_t, uint32_t *, int64_t, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x2))) +void svst1_vnum(svcount_t, float32_t *, int64_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x2))) +void svst1_vnum(svcount_t, int32_t *, int64_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x4))) +void svst1_vnum(svcount_t, uint8_t *, int64_t, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x4))) +void svst1_vnum(svcount_t, int8_t *, int64_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x4))) +void svst1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x4))) +void svst1_vnum(svcount_t, uint64_t *, int64_t, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x4))) +void svst1_vnum(svcount_t, float64_t *, int64_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x4))) +void svst1_vnum(svcount_t, int64_t *, int64_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x4))) +void svst1_vnum(svcount_t, uint16_t *, int64_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x4))) +void svst1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x4))) +void svst1_vnum(svcount_t, float16_t *, int64_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x4))) +void svst1_vnum(svcount_t, int16_t *, int64_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x4))) +void svst1_vnum(svcount_t, uint32_t *, int64_t, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x4))) +void svst1_vnum(svcount_t, float32_t *, int64_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x4))) +void svst1_vnum(svcount_t, int32_t *, int64_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x2))) +void svstnt1(svcount_t, uint8_t *, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x2))) +void svstnt1(svcount_t, int8_t *, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x2))) +void svstnt1(svcount_t, mfloat8_t *, svmfloat8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x2))) +void svstnt1(svcount_t, uint64_t *, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x2))) +void svstnt1(svcount_t, float64_t *, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x2))) +void svstnt1(svcount_t, int64_t *, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x2))) +void svstnt1(svcount_t, uint16_t *, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x2))) +void svstnt1(svcount_t, bfloat16_t *, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x2))) +void svstnt1(svcount_t, float16_t *, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x2))) +void svstnt1(svcount_t, int16_t *, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x2))) +void svstnt1(svcount_t, uint32_t *, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x2))) +void svstnt1(svcount_t, float32_t *, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x2))) +void svstnt1(svcount_t, int32_t *, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x4))) +void svstnt1(svcount_t, uint8_t *, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x4))) +void svstnt1(svcount_t, int8_t *, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x4))) +void svstnt1(svcount_t, mfloat8_t *, svmfloat8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x4))) +void svstnt1(svcount_t, uint64_t *, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x4))) +void svstnt1(svcount_t, float64_t *, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x4))) +void svstnt1(svcount_t, int64_t *, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x4))) +void svstnt1(svcount_t, uint16_t *, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x4))) +void svstnt1(svcount_t, bfloat16_t *, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x4))) +void svstnt1(svcount_t, float16_t *, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x4))) +void svstnt1(svcount_t, int16_t *, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x4))) +void svstnt1(svcount_t, uint32_t *, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x4))) +void svstnt1(svcount_t, float32_t *, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x4))) +void svstnt1(svcount_t, int32_t *, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x2))) +void svstnt1_vnum(svcount_t, uint8_t *, int64_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x2))) +void svstnt1_vnum(svcount_t, int8_t *, int64_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x2))) +void svstnt1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x2))) +void svstnt1_vnum(svcount_t, uint64_t *, int64_t, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x2))) +void svstnt1_vnum(svcount_t, float64_t *, int64_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x2))) +void svstnt1_vnum(svcount_t, int64_t *, int64_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x2))) +void svstnt1_vnum(svcount_t, uint16_t *, int64_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x2))) +void svstnt1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x2))) +void svstnt1_vnum(svcount_t, float16_t *, int64_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x2))) +void svstnt1_vnum(svcount_t, int16_t *, int64_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x2))) +void svstnt1_vnum(svcount_t, uint32_t *, int64_t, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x2))) +void svstnt1_vnum(svcount_t, float32_t *, int64_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x2))) +void svstnt1_vnum(svcount_t, int32_t *, int64_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x4))) +void svstnt1_vnum(svcount_t, uint8_t *, int64_t, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x4))) +void svstnt1_vnum(svcount_t, int8_t *, int64_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x4))) +void svstnt1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x4))) +void svstnt1_vnum(svcount_t, uint64_t *, int64_t, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x4))) +void svstnt1_vnum(svcount_t, float64_t *, int64_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x4))) +void svstnt1_vnum(svcount_t, int64_t *, int64_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x4))) +void svstnt1_vnum(svcount_t, uint16_t *, int64_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x4))) +void svstnt1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x4))) +void svstnt1_vnum(svcount_t, float16_t *, int64_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x4))) +void svstnt1_vnum(svcount_t, int16_t *, int64_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x4))) +void svstnt1_vnum(svcount_t, uint32_t *, int64_t, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x4))) +void svstnt1_vnum(svcount_t, float32_t *, int64_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x4))) +void svstnt1_vnum(svcount_t, int32_t *, int64_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_s64))) +svcount_t svwhilege_c8(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_s64))) +svcount_t svwhilege_c32(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_s64))) +svcount_t svwhilege_c64(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_s64))) +svcount_t svwhilege_c16(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_u64))) +svcount_t svwhilege_c8(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_u64))) +svcount_t svwhilege_c32(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_u64))) +svcount_t svwhilege_c64(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_u64))) +svcount_t svwhilege_c16(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_s64))) +svcount_t svwhilegt_c8(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_s64))) +svcount_t svwhilegt_c32(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_s64))) +svcount_t svwhilegt_c64(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_s64))) +svcount_t svwhilegt_c16(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_u64))) +svcount_t svwhilegt_c8(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_u64))) +svcount_t svwhilegt_c32(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_u64))) +svcount_t svwhilegt_c64(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_u64))) +svcount_t svwhilegt_c16(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_s64))) +svcount_t svwhilele_c8(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_s64))) +svcount_t svwhilele_c32(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_s64))) +svcount_t svwhilele_c64(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_s64))) +svcount_t svwhilele_c16(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_u64))) +svcount_t svwhilele_c8(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_u64))) +svcount_t svwhilele_c32(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_u64))) +svcount_t svwhilele_c64(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_u64))) +svcount_t svwhilele_c16(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_u64))) +svcount_t svwhilelt_c8(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_u64))) +svcount_t svwhilelt_c32(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_u64))) +svcount_t svwhilelt_c64(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_u64))) +svcount_t svwhilelt_c16(uint64_t, uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_s64))) +svcount_t svwhilelt_c8(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_s64))) +svcount_t svwhilelt_c32(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_s64))) +svcount_t svwhilelt_c64(int64_t, int64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_s64))) +svcount_t svwhilelt_c16(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_m))) svfloat64_t svabd_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f32_m))) @@ -31877,6 +32233,30 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f32))) svfloat32_t svexpa(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f16))) svfloat16_t svexpa(svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u32))) +svuint32_t svcompact_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u64))) +svuint64_t svcompact_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f64))) +svfloat64_t svcompact_f64(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f32))) +svfloat32_t svcompact_f32(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s32))) +svint32_t svcompact_s32(svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s64))) +svint64_t svcompact_s64(svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u32))) +svuint32_t svcompact(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u64))) +svuint64_t svcompact(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f64))) +svfloat64_t svcompact(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f32))) +svfloat32_t svcompact(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s32))) +svint32_t svcompact(svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s64))) +svint64_t svcompact(svbool_t, svint64_t); #define svcvtnt_bf16_x svcvtnt_bf16_m #define svcvtnt_bf16_f32_x svcvtnt_bf16_f32_m #define svcvtnt_f16_x svcvtnt_f16_m diff --git a/lib/include/avx10_2_512bf16intrin.h b/lib/include/avx10_2_512bf16intrin.h index 75290d22ef..3e9f27443e 100644 --- a/lib/include/avx10_2_512bf16intrin.h +++ b/lib/include/avx10_2_512bf16intrin.h @@ -21,9 +21,15 @@ typedef __bf16 __m512bh_u __attribute__((__vector_size__(64), __aligned__(1))); /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS512 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \ __min_vector_width__(512))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr +#else +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 +#endif + static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_setzero_pbh(void) { return __builtin_bit_cast(__m512bh, _mm512_setzero_ps()); } @@ -167,13 +173,13 @@ _mm512_mask_blend_pbh(__mmask32 __U, __m512bh __A, __m512bh __W) { (__v32bf)__A); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +static __inline__ __m512bh __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) { return (__m512bh)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I, (__v32hi)__B); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +static __inline__ __m512bh __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_pbh(__m512i __A, __m512bh __B) { return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); } @@ -423,7 +429,7 @@ _mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) { (__v32bf)_mm512_setzero_pbh(), (__mmask32)(__U))) static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sqrt_pbh(__m512bh __A) { - return (__m512bh)__builtin_ia32_vsqrtbf16512((__v32bf)__A); + return __builtin_elementwise_sqrt(__A); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 @@ -441,8 +447,8 @@ _mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) { static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B, - (__v32bf)__C); + return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, (__v32bf)__B, + (__v32bf)__C); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 @@ -469,8 +475,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pbh( static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B, - -(__v32bf)__C); + return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, (__v32bf)__B, + -(__v32bf)__C); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 @@ -497,8 +503,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pbh( static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fnmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B, - (__v32bf)__C); + return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, -(__v32bf)__B, + (__v32bf)__C); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pbh( @@ -527,8 +533,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pbh( static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fnmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B, - -(__v32bf)__C); + return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, -(__v32bf)__B, + -(__v32bf)__C); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pbh( @@ -555,6 +561,7 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pbh( (__v32bf)_mm512_setzero_pbh()); } +#undef __DEFAULT_FN_ATTRS512_CONSTEXPR #undef __DEFAULT_FN_ATTRS512 #endif diff --git a/lib/include/avx10_2_512convertintrin.h b/lib/include/avx10_2_512convertintrin.h index ee8cbf28ca..ffaed08cee 100644 --- a/lib/include/avx10_2_512convertintrin.h +++ b/lib/include/avx10_2_512convertintrin.h @@ -18,7 +18,7 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS512 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \ __min_vector_width__(512))) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtx2ps_ph(__m512 __A, diff --git a/lib/include/avx10_2_512niintrin.h b/lib/include/avx10_2_512niintrin.h index 7e614f7740..b2215b72c5 100644 --- a/lib/include/avx10_2_512niintrin.h +++ b/lib/include/avx10_2_512niintrin.h @@ -17,7 +17,7 @@ #define __AVX10_2_512NIINTRIN_H #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \ __min_vector_width__(512))) /* VNNI FP16 */ @@ -64,8 +64,8 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_dpph_ps(__mmask16 __U, static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbssd_epi32(__m512i __W, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_vpdpbssd512((__v16si)__W, (__v16si)__A, - (__v16si)__B); + return (__m512i)__builtin_ia32_vpdpbssd512((__v16si)__W, (__v64qi)__A, + (__v64qi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -84,8 +84,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbssd_epi32( static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbssds_epi32(__m512i __W, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_vpdpbssds512((__v16si)__W, (__v16si)__A, - (__v16si)__B); + return (__m512i)__builtin_ia32_vpdpbssds512((__v16si)__W, (__v64qi)__A, + (__v64qi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbssds_epi32( @@ -104,8 +104,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbssds_epi32( static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbsud_epi32(__m512i __W, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_vpdpbsud512((__v16si)__W, (__v16si)__A, - (__v16si)__B); + return (__m512i)__builtin_ia32_vpdpbsud512((__v16si)__W, (__v64qi)__A, + (__v64qu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -124,8 +124,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbsud_epi32( static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbsuds_epi32(__m512i __W, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_vpdpbsuds512((__v16si)__W, (__v16si)__A, - (__v16si)__B); + return (__m512i)__builtin_ia32_vpdpbsuds512((__v16si)__W, (__v64qi)__A, + (__v64qu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbsuds_epi32( @@ -144,8 +144,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbsuds_epi32( static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbuud_epi32(__m512i __W, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_vpdpbuud512((__v16si)__W, (__v16si)__A, - (__v16si)__B); + return (__m512i)__builtin_ia32_vpdpbuud512((__v16si)__W, (__v64qu)__A, + (__v64qu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -164,8 +164,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbuud_epi32( static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbuuds_epi32(__m512i __W, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_vpdpbuuds512((__v16si)__W, (__v16si)__A, - (__v16si)__B); + return (__m512i)__builtin_ia32_vpdpbuuds512((__v16si)__W, (__v64qu)__A, + (__v64qu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbuuds_epi32( @@ -185,8 +185,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbuuds_epi32( static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwsud_epi32(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i)__builtin_ia32_vpdpwsud512((__v16si)__A, (__v16si)__B, - (__v16si)__C); + return (__m512i)__builtin_ia32_vpdpwsud512((__v16si)__A, (__v32hi)__B, + (__v32hu)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -197,7 +197,7 @@ _mm512_mask_dpwsud_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsud_epi32( - __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectd_512( (__mmask16)__U, (__v16si)_mm512_dpwsud_epi32(__A, __B, __C), (__v16si)_mm512_setzero_si512()); @@ -206,8 +206,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsud_epi32( static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwsuds_epi32(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i)__builtin_ia32_vpdpwsuds512((__v16si)__A, (__v16si)__B, - (__v16si)__C); + return (__m512i)__builtin_ia32_vpdpwsuds512((__v16si)__A, (__v32hi)__B, + (__v32hu)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwsuds_epi32( @@ -218,7 +218,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwsuds_epi32( } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsuds_epi32( - __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectd_512( (__mmask16)__U, (__v16si)_mm512_dpwsuds_epi32(__A, __B, __C), (__v16si)_mm512_setzero_si512()); @@ -227,8 +227,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsuds_epi32( static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwusd_epi32(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i)__builtin_ia32_vpdpwusd512((__v16si)__A, (__v16si)__B, - (__v16si)__C); + return (__m512i)__builtin_ia32_vpdpwusd512((__v16si)__A, (__v32hu)__B, + (__v32hi)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -239,7 +239,7 @@ _mm512_mask_dpwusd_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusd_epi32( - __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectd_512( (__mmask16)__U, (__v16si)_mm512_dpwusd_epi32(__A, __B, __C), (__v16si)_mm512_setzero_si512()); @@ -248,8 +248,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusd_epi32( static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwusds_epi32(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i)__builtin_ia32_vpdpwusds512((__v16si)__A, (__v16si)__B, - (__v16si)__C); + return (__m512i)__builtin_ia32_vpdpwusds512((__v16si)__A, (__v32hu)__B, + (__v32hi)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwusds_epi32( @@ -260,7 +260,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwusds_epi32( } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusds_epi32( - __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectd_512( (__mmask16)__U, (__v16si)_mm512_dpwusds_epi32(__A, __B, __C), (__v16si)_mm512_setzero_si512()); @@ -269,8 +269,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusds_epi32( static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwuud_epi32(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i)__builtin_ia32_vpdpwuud512((__v16si)__A, (__v16si)__B, - (__v16si)__C); + return (__m512i)__builtin_ia32_vpdpwuud512((__v16si)__A, (__v32hu)__B, + (__v32hu)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -281,7 +281,7 @@ _mm512_mask_dpwuud_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuud_epi32( - __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectd_512( (__mmask16)__U, (__v16si)_mm512_dpwuud_epi32(__A, __B, __C), (__v16si)_mm512_setzero_si512()); @@ -290,8 +290,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuud_epi32( static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwuuds_epi32(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i)__builtin_ia32_vpdpwuuds512((__v16si)__A, (__v16si)__B, - (__v16si)__C); + return (__m512i)__builtin_ia32_vpdpwuuds512((__v16si)__A, (__v32hu)__B, + (__v32hu)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwuuds_epi32( @@ -302,7 +302,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwuuds_epi32( } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuuds_epi32( - __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectd_512( (__mmask16)__U, (__v16si)_mm512_dpwuuds_epi32(__A, __B, __C), (__v16si)_mm512_setzero_si512()); diff --git a/lib/include/avx10_2_512satcvtdsintrin.h b/lib/include/avx10_2_512satcvtdsintrin.h index 012a6282b5..3688f4c0df 100644 --- a/lib/include/avx10_2_512satcvtdsintrin.h +++ b/lib/include/avx10_2_512satcvtdsintrin.h @@ -16,7 +16,7 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \ __min_vector_width__(512))) // 512 bit : Double -> Int diff --git a/lib/include/avx10_2bf16intrin.h b/lib/include/avx10_2bf16intrin.h index 66797ae00f..179ec53402 100644 --- a/lib/include/avx10_2bf16intrin.h +++ b/lib/include/avx10_2bf16intrin.h @@ -21,12 +21,20 @@ typedef __bf16 __m256bh_u __attribute__((__vector_size__(32), __aligned__(1))); /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS256 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \ __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \ __min_vector_width__(128))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#else +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#endif + static __inline __m256bh __DEFAULT_FN_ATTRS256 _mm256_setzero_pbh(void) { return __builtin_bit_cast(__m256bh, _mm256_setzero_ps()); } @@ -213,12 +221,12 @@ static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_move_sbh(__m128bh __a, return __a; } -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_sbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { return __builtin_ia32_selectsbf_128(__U, _mm_move_sbh(__A, __B), __W); } -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_sbh(__mmask8 __U, __m128bh __A, __m128bh __B) { return __builtin_ia32_selectsbf_128(__U, _mm_move_sbh(__A, __B), _mm_setzero_pbh()); @@ -287,24 +295,24 @@ _mm256_mask_blend_pbh(__mmask16 __U, __m256bh __A, __m256bh __W) { (__v16bf)__A); } -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutex2var_pbh(__m128bh __A, __m128i __I, __m128bh __B) { return (__m128bh)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I, (__v8hi)__B); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +static __inline__ __m256bh __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutex2var_pbh(__m256bh __A, __m256i __I, __m256bh __B) { return (__m256bh)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I, (__v16hi)__B); } -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutexvar_pbh(__m128i __A, __m128bh __B) { return (__m128bh)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +static __inline__ __m256bh __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutexvar_pbh(__m256i __A, __m256bh __B) { return (__m256bh)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A); } @@ -519,34 +527,34 @@ _mm_maskz_min_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { (__mmask8)__U, (__v8bf)_mm_min_pbh(__A, __B), (__v8bf)_mm_setzero_pbh()); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sbh(__m128bh A, - __m128bh B) { - return __builtin_ia32_vcomisbf16eq((__v8bf)A, (__v8bf)B); +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sbh(__m128bh __A, + __m128bh __B) { + return __builtin_ia32_vcomisbf16eq((__v8bf)__A, (__v8bf)__B); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sbh(__m128bh A, - __m128bh B) { - return __builtin_ia32_vcomisbf16lt((__v8bf)A, (__v8bf)B); +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sbh(__m128bh __A, + __m128bh __B) { + return __builtin_ia32_vcomisbf16lt((__v8bf)__A, (__v8bf)__B); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sbh(__m128bh A, - __m128bh B) { - return __builtin_ia32_vcomisbf16le((__v8bf)A, (__v8bf)B); +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sbh(__m128bh __A, + __m128bh __B) { + return __builtin_ia32_vcomisbf16le((__v8bf)__A, (__v8bf)__B); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sbh(__m128bh A, - __m128bh B) { - return __builtin_ia32_vcomisbf16gt((__v8bf)A, (__v8bf)B); +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sbh(__m128bh __A, + __m128bh __B) { + return __builtin_ia32_vcomisbf16gt((__v8bf)__A, (__v8bf)__B); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sbh(__m128bh A, - __m128bh B) { - return __builtin_ia32_vcomisbf16ge((__v8bf)A, (__v8bf)B); +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sbh(__m128bh __A, + __m128bh __B) { + return __builtin_ia32_vcomisbf16ge((__v8bf)__A, (__v8bf)__B); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sbh(__m128bh A, - __m128bh B) { - return __builtin_ia32_vcomisbf16neq((__v8bf)A, (__v8bf)B); +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sbh(__m128bh __A, + __m128bh __B) { + return __builtin_ia32_vcomisbf16neq((__v8bf)__A, (__v8bf)__B); } #define _mm256_cmp_pbh_mask(__A, __B, __P) \ @@ -818,7 +826,7 @@ _mm_maskz_rsqrt_pbh(__mmask8 __U, __m128bh __A) { (__v8bf)_mm_setzero_pbh(), (__mmask8)(__U))) static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_sqrt_pbh(__m256bh __A) { - return (__m256bh)__builtin_ia32_vsqrtbf16256((__v16bf)__A); + return __builtin_elementwise_sqrt(__A); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 @@ -835,7 +843,7 @@ _mm256_maskz_sqrt_pbh(__mmask16 __U, __m256bh __A) { } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_sqrt_pbh(__m128bh __A) { - return (__m128bh)__builtin_ia32_vsqrtbf16((__v8bf)__A); + return __builtin_elementwise_sqrt(__A); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 @@ -852,8 +860,8 @@ _mm_maskz_sqrt_pbh(__mmask8 __U, __m128bh __A) { static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, (__v16bf)__B, - (__v16bf)__C); + return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, (__v16bf)__B, + (__v16bf)__C); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 @@ -880,8 +888,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pbh( static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, (__v16bf)__B, - -(__v16bf)__C); + return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, (__v16bf)__B, + -(__v16bf)__C); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 @@ -908,8 +916,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pbh( static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fnmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, -(__v16bf)__B, - (__v16bf)__C); + return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, -(__v16bf)__B, + (__v16bf)__C); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pbh( @@ -938,8 +946,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pbh( static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fnmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, -(__v16bf)__B, - -(__v16bf)__C); + return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, -(__v16bf)__B, + -(__v16bf)__C); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pbh( @@ -969,8 +977,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pbh( static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmadd_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, (__v8bf)__B, - (__v8bf)__C); + return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, (__v8bf)__B, + (__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 @@ -997,8 +1005,8 @@ _mm_maskz_fmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmsub_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, (__v8bf)__B, - -(__v8bf)__C); + return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, (__v8bf)__B, + -(__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 @@ -1025,8 +1033,8 @@ _mm_maskz_fmsub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmadd_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, -(__v8bf)__B, - (__v8bf)__C); + return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, -(__v8bf)__B, + (__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 @@ -1053,8 +1061,8 @@ _mm_maskz_fnmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmsub_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, -(__v8bf)__B, - -(__v8bf)__C); + return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, -(__v8bf)__B, + -(__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 @@ -1080,6 +1088,7 @@ _mm_maskz_fnmsub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 - +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif #endif diff --git a/lib/include/avx10_2convertintrin.h b/lib/include/avx10_2convertintrin.h index 19d91d41f7..2800ee7311 100644 --- a/lib/include/avx10_2convertintrin.h +++ b/lib/include/avx10_2convertintrin.h @@ -18,10 +18,10 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \ __min_vector_width__(256))) // clang-format off diff --git a/lib/include/avx10_2copyintrin.h b/lib/include/avx10_2copyintrin.h index 76b8f8ced5..37dc06ac9e 100644 --- a/lib/include/avx10_2copyintrin.h +++ b/lib/include/avx10_2copyintrin.h @@ -16,7 +16,7 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \ __min_vector_width__(128))) /// Constructs a 128-bit integer vector, setting the lower 32 bits to the diff --git a/lib/include/avx10_2niintrin.h b/lib/include/avx10_2niintrin.h index 992be18f77..9a772ec434 100644 --- a/lib/include/avx10_2niintrin.h +++ b/lib/include/avx10_2niintrin.h @@ -16,10 +16,10 @@ #define __AVX10_2NIINTRIN_H #define __DEFAULT_FN_ATTRS128 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \ __min_vector_width__(256))) /* VNNI FP16 */ @@ -253,7 +253,7 @@ _mm_mask_dpwsud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpwsud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { +_mm_maskz_dpwsud_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128( (__mmask8)__U, (__v4si)_mm_dpwsud_epi32(__A, __B, __C), (__v4si)_mm_setzero_si128()); @@ -266,7 +266,7 @@ _mm256_mask_dpwsud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_dpwsud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { +_mm256_maskz_dpwsud_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256( (__mmask8)__U, (__v8si)_mm256_dpwsud_epi32(__A, __B, __C), (__v8si)_mm256_setzero_si256()); @@ -279,7 +279,7 @@ _mm_mask_dpwsuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpwsuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { +_mm_maskz_dpwsuds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128( (__mmask8)__U, (__v4si)_mm_dpwsuds_epi32(__A, __B, __C), (__v4si)_mm_setzero_si128()); @@ -292,7 +292,7 @@ _mm256_mask_dpwsuds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwsuds_epi32( - __m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { + __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256( (__mmask8)__U, (__v8si)_mm256_dpwsuds_epi32(__A, __B, __C), (__v8si)_mm256_setzero_si256()); @@ -305,7 +305,7 @@ _mm_mask_dpwusd_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpwusd_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { +_mm_maskz_dpwusd_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128( (__mmask8)__U, (__v4si)_mm_dpwusd_epi32(__A, __B, __C), (__v4si)_mm_setzero_si128()); @@ -318,7 +318,7 @@ _mm256_mask_dpwusd_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_dpwusd_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { +_mm256_maskz_dpwusd_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256( (__mmask8)__U, (__v8si)_mm256_dpwusd_epi32(__A, __B, __C), (__v8si)_mm256_setzero_si256()); @@ -331,7 +331,7 @@ _mm_mask_dpwusds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpwusds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { +_mm_maskz_dpwusds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128( (__mmask8)__U, (__v4si)_mm_dpwusds_epi32(__A, __B, __C), (__v4si)_mm_setzero_si128()); @@ -344,7 +344,7 @@ _mm256_mask_dpwusds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwusds_epi32( - __m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { + __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256( (__mmask8)__U, (__v8si)_mm256_dpwusds_epi32(__A, __B, __C), (__v8si)_mm256_setzero_si256()); @@ -357,7 +357,7 @@ _mm_mask_dpwuud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpwuud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { +_mm_maskz_dpwuud_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128( (__mmask8)__U, (__v4si)_mm_dpwuud_epi32(__A, __B, __C), (__v4si)_mm_setzero_si128()); @@ -370,7 +370,7 @@ _mm256_mask_dpwuud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_dpwuud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { +_mm256_maskz_dpwuud_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256( (__mmask8)__U, (__v8si)_mm256_dpwuud_epi32(__A, __B, __C), (__v8si)_mm256_setzero_si256()); @@ -383,7 +383,7 @@ _mm_mask_dpwuuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpwuuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { +_mm_maskz_dpwuuds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128( (__mmask8)__U, (__v4si)_mm_dpwuuds_epi32(__A, __B, __C), (__v4si)_mm_setzero_si128()); @@ -396,7 +396,7 @@ _mm256_mask_dpwuuds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwuuds_epi32( - __m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { + __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256( (__mmask8)__U, (__v8si)_mm256_dpwuuds_epi32(__A, __B, __C), (__v8si)_mm256_setzero_si256()); diff --git a/lib/include/avx10_2satcvtdsintrin.h b/lib/include/avx10_2satcvtdsintrin.h index cc840368c3..57d299606a 100644 --- a/lib/include/avx10_2satcvtdsintrin.h +++ b/lib/include/avx10_2satcvtdsintrin.h @@ -17,11 +17,11 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS256 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \ __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \ __min_vector_width__(128))) #define _mm_cvtts_roundsd_i32(__A, __R) \ diff --git a/lib/include/avx2intrin.h b/lib/include/avx2intrin.h index dc9fc07314..d3ceb2327a 100644 --- a/lib/include/avx2intrin.h +++ b/lib/include/avx2intrin.h @@ -15,20 +15,19 @@ #define __AVX2INTRIN_H /* Define the default attributes for the functions in this file. */ -#if defined(__EVEX512__) && !defined(__AVX10_1_512__) -#define __DEFAULT_FN_ATTRS256 \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx2,no-evex512"), __min_vector_width__(256))) -#define __DEFAULT_FN_ATTRS128 \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx2,no-evex512"), __min_vector_width__(128))) -#else #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("avx2"), \ __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("avx2"), \ __min_vector_width__(128))) + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#else +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 #endif /* SSE4 Multiple Packed Sums of Absolute Difference. */ @@ -104,10 +103,9 @@ /// \param __a /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_abs_epi8(__m256i __a) -{ - return (__m256i)__builtin_elementwise_abs((__v32qs)__a); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_abs_epi8(__m256i __a) { + return (__m256i)__builtin_elementwise_abs((__v32qs)__a); } /// Computes the absolute value of each signed 16-bit element in the 256-bit @@ -121,10 +119,9 @@ _mm256_abs_epi8(__m256i __a) /// \param __a /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_abs_epi16(__m256i __a) -{ - return (__m256i)__builtin_elementwise_abs((__v16hi)__a); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_abs_epi16(__m256i __a) { + return (__m256i)__builtin_elementwise_abs((__v16hi)__a); } /// Computes the absolute value of each signed 32-bit element in the 256-bit @@ -138,10 +135,9 @@ _mm256_abs_epi16(__m256i __a) /// \param __a /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_abs_epi32(__m256i __a) -{ - return (__m256i)__builtin_elementwise_abs((__v8si)__a); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_abs_epi32(__m256i __a) { + return (__m256i)__builtin_elementwise_abs((__v8si)__a); } /// Converts the elements of two 256-bit vectors of [16 x i16] to 8-bit @@ -169,9 +165,8 @@ _mm256_abs_epi32(__m256i __a) /// A 256-bit vector of [16 x i16] used to generate result[127:64] and /// result[255:192]. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_packs_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_packs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b); } @@ -201,9 +196,8 @@ _mm256_packs_epi16(__m256i __a, __m256i __b) /// A 256-bit vector of [8 x i32] used to generate result[127:64] and /// result[255:192]. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_packs_epi32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_packs_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b); } @@ -232,9 +226,8 @@ _mm256_packs_epi32(__m256i __a, __m256i __b) /// A 256-bit vector of [16 x i16] used to generate result[127:64] and /// result[255:192]. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_packus_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_packus_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b); } @@ -264,9 +257,8 @@ _mm256_packus_epi16(__m256i __a, __m256i __b) /// A 256-bit vector of [8 x i32] used to generate result[127:64] and /// result[255:192]. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_packus_epi32(__m256i __V1, __m256i __V2) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_packus_epi32(__m256i __V1, __m256i __V2) { return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2); } @@ -283,9 +275,8 @@ _mm256_packus_epi32(__m256i __V1, __m256i __V2) /// \param __b /// A 256-bit integer vector containing one of the source operands. /// \returns A 256-bit integer vector containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_add_epi8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_add_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a + (__v32qu)__b); } @@ -302,9 +293,8 @@ _mm256_add_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_add_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_add_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a + (__v16hu)__b); } @@ -321,9 +311,8 @@ _mm256_add_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_add_epi32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_add_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a + (__v8su)__b); } @@ -340,9 +329,8 @@ _mm256_add_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [4 x i64] containing one of the source operands. /// \returns A 256-bit vector of [4 x i64] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_add_epi64(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_add_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a + (__v4du)__b); } @@ -359,9 +347,8 @@ _mm256_add_epi64(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector containing one of the source operands. /// \returns A 256-bit integer vector containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_adds_epi8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_adds_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_add_sat((__v32qs)__a, (__v32qs)__b); } @@ -377,9 +364,8 @@ _mm256_adds_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_adds_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_adds_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_add_sat((__v16hi)__a, (__v16hi)__b); } @@ -396,9 +382,8 @@ _mm256_adds_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector containing one of the source operands. /// \returns A 256-bit integer vector containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_adds_epu8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_adds_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_add_sat((__v32qu)__a, (__v32qu)__b); } @@ -414,9 +399,8 @@ _mm256_adds_epu8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_adds_epu16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_adds_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_add_sat((__v16hu)__a, (__v16hu)__b); } @@ -460,7 +444,7 @@ _mm256_adds_epu16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_and_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a & (__v4du)__b); @@ -478,7 +462,7 @@ _mm256_and_si256(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_andnot_si256(__m256i __a, __m256i __b) { return (__m256i)(~(__v4du)__a & (__v4du)__b); @@ -504,10 +488,9 @@ _mm256_andnot_si256(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_avg_epu8(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_avg_epu8(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_pavgb256((__v32qu)__a, (__v32qu)__b); } /// Computes the averages of the corresponding unsigned 16-bit integers in @@ -530,10 +513,9 @@ _mm256_avg_epu8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_avg_epu16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_avg_epu16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_pavgw256((__v16hu)__a, (__v16hu)__b); } /// Merges 8-bit integer values from either of the two 256-bit vectors @@ -565,9 +547,8 @@ _mm256_avg_epu16(__m256i __a, __m256i __b) /// is 0, the byte is copied from \a __V1; otherwise, it is copied from /// \a __V2. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) { return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2, (__v32qi)__M); } @@ -633,7 +614,7 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) /// \param __b /// A 256-bit integer vector containing one of the inputs. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cmpeq_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qi)__a == (__v32qi)__b); @@ -659,7 +640,7 @@ _mm256_cmpeq_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the inputs. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cmpeq_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a == (__v16hi)__b); @@ -685,7 +666,7 @@ _mm256_cmpeq_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the inputs. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cmpeq_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a == (__v8si)__b); @@ -711,7 +692,7 @@ _mm256_cmpeq_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [4 x i64] containing one of the inputs. /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cmpeq_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4di)__a == (__v4di)__b); @@ -737,7 +718,7 @@ _mm256_cmpeq_epi64(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector containing one of the inputs. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cmpgt_epi8(__m256i __a, __m256i __b) { /* This function always performs a signed comparison, but __v32qi is a char @@ -765,7 +746,7 @@ _mm256_cmpgt_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the inputs. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cmpgt_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a > (__v16hi)__b); @@ -791,7 +772,7 @@ _mm256_cmpgt_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the inputs. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cmpgt_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a > (__v8si)__b); @@ -817,7 +798,7 @@ _mm256_cmpgt_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [4 x i64] containing one of the inputs. /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cmpgt_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4di)__a > (__v4di)__b); @@ -853,10 +834,9 @@ _mm256_cmpgt_epi64(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hadd_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hadd_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally adds the adjacent pairs of 32-bit integers from two 256-bit @@ -885,10 +865,9 @@ _mm256_hadd_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hadd_epi32(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hadd_epi32(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); } /// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit @@ -920,10 +899,9 @@ _mm256_hadd_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hadds_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hadds_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit @@ -956,10 +934,9 @@ _mm256_hadds_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hsub_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hsub_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally subtracts adjacent pairs of 32-bit integers from two 256-bit @@ -988,10 +965,9 @@ _mm256_hsub_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hsub_epi32(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hsub_epi32(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); } /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit @@ -1024,10 +1000,9 @@ _mm256_hsub_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hsubs_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hsubs_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); } /// Multiplies each unsigned byte from the 256-bit integer vector in \a __a @@ -1054,10 +1029,9 @@ _mm256_hsubs_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maddubs_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maddubs_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b); } /// Multiplies corresponding 16-bit elements of two 256-bit vectors of @@ -1086,9 +1060,8 @@ _mm256_maddubs_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_madd_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_madd_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b); } @@ -1105,9 +1078,8 @@ _mm256_madd_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_max_epi8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_max_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_max((__v32qs)__a, (__v32qs)__b); } @@ -1124,9 +1096,8 @@ _mm256_max_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_max_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_max_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_max((__v16hi)__a, (__v16hi)__b); } @@ -1143,9 +1114,8 @@ _mm256_max_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_max_epi32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_max_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_max((__v8si)__a, (__v8si)__b); } @@ -1162,9 +1132,8 @@ _mm256_max_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_max_epu8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_max_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_max((__v32qu)__a, (__v32qu)__b); } @@ -1181,9 +1150,8 @@ _mm256_max_epu8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_max_epu16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_max_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_max((__v16hu)__a, (__v16hu)__b); } @@ -1200,9 +1168,8 @@ _mm256_max_epu16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_max_epu32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_max_epu32(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_max((__v8su)__a, (__v8su)__b); } @@ -1219,9 +1186,8 @@ _mm256_max_epu32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_min_epi8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_min_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_min((__v32qs)__a, (__v32qs)__b); } @@ -1238,9 +1204,8 @@ _mm256_min_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_min_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_min_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_min((__v16hi)__a, (__v16hi)__b); } @@ -1257,9 +1222,8 @@ _mm256_min_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_min_epi32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_min_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_min((__v8si)__a, (__v8si)__b); } @@ -1276,9 +1240,8 @@ _mm256_min_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_min_epu8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_min_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_min((__v32qu)__a, (__v32qu)__b); } @@ -1295,9 +1258,8 @@ _mm256_min_epu8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_min_epu16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_min_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_min((__v16hu)__a, (__v16hu)__b); } @@ -1314,9 +1276,8 @@ _mm256_min_epu16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_min_epu32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_min_epu32(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_min((__v8su)__a, (__v8su)__b); } @@ -1337,9 +1298,8 @@ _mm256_min_epu32(__m256i __a, __m256i __b) /// \param __a /// A 256-bit integer vector containing the source bytes. /// \returns The 32-bit integer mask. -static __inline__ int __DEFAULT_FN_ATTRS256 -_mm256_movemask_epi8(__m256i __a) -{ +static __inline__ int __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_movemask_epi8(__m256i __a) { return __builtin_ia32_pmovmskb256((__v32qi)__a); } @@ -1363,9 +1323,8 @@ _mm256_movemask_epi8(__m256i __a) /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [16 x i16] containing the sign-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi8_epi16(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi8_epi16(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi); @@ -1391,9 +1350,8 @@ _mm256_cvtepi8_epi16(__m128i __V) /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [8 x i32] containing the sign-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi8_epi32(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi8_epi32(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); @@ -1418,9 +1376,8 @@ _mm256_cvtepi8_epi32(__m128i __V) /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [4 x i64] containing the sign-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi8_epi64(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi8_epi64(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di); @@ -1446,9 +1403,8 @@ _mm256_cvtepi8_epi64(__m128i __V) /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [8 x i32] containing the sign-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi16_epi32(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi16_epi32(__m128i __V) { return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si); } @@ -1471,9 +1427,8 @@ _mm256_cvtepi16_epi32(__m128i __V) /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the sign-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi16_epi64(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi16_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di); } @@ -1496,9 +1451,8 @@ _mm256_cvtepi16_epi64(__m128i __V) /// A 128-bit vector of [4 x i32] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the sign-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi32_epi64(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi32_epi64(__m128i __V) { return (__m256i)__builtin_convertvector((__v4si)__V, __v4di); } @@ -1522,9 +1476,8 @@ _mm256_cvtepi32_epi64(__m128i __V) /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [16 x i16] containing the zero-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepu8_epi16(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu8_epi16(__m128i __V) { return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi); } @@ -1548,9 +1501,8 @@ _mm256_cvtepu8_epi16(__m128i __V) /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [8 x i32] containing the zero-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepu8_epi32(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu8_epi32(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } @@ -1573,9 +1525,8 @@ _mm256_cvtepu8_epi32(__m128i __V) /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [4 x i64] containing the zero-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepu8_epi64(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu8_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di); } @@ -1599,9 +1550,8 @@ _mm256_cvtepu8_epi64(__m128i __V) /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [8 x i32] containing the zero-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepu16_epi32(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu16_epi32(__m128i __V) { return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si); } @@ -1624,9 +1574,8 @@ _mm256_cvtepu16_epi32(__m128i __V) /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the zero-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepu16_epi64(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu16_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di); } @@ -1649,9 +1598,8 @@ _mm256_cvtepu16_epi64(__m128i __V) /// A 128-bit vector of [4 x i32] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the zero-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepu32_epi64(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu32_epi64(__m128i __V) { return (__m256i)__builtin_convertvector((__v4su)__V, __v4di); } @@ -1675,9 +1623,8 @@ _mm256_cvtepu32_epi64(__m128i __V) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [4 x i64] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mul_epi32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mul_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b); } @@ -1702,9 +1649,8 @@ _mm256_mul_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the rounded products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mulhrs_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mulhrs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b); } @@ -1721,10 +1667,9 @@ _mm256_mulhrs_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mulhi_epu16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mulhi_epu16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_pmulhuw256((__v16hu)__a, (__v16hu)__b); } /// Multiplies signed 16-bit integer elements of two 256-bit vectors of @@ -1740,7 +1685,7 @@ _mm256_mulhi_epu16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mulhi_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b); @@ -1759,7 +1704,7 @@ _mm256_mulhi_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mullo_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a * (__v16hu)__b); @@ -1778,9 +1723,8 @@ _mm256_mullo_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mullo_epi32 (__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mullo_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a * (__v8su)__b); } @@ -1804,9 +1748,8 @@ _mm256_mullo_epi32 (__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [4 x i64] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mul_epu32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mul_epu32(__m256i __a, __m256i __b) { return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b); } @@ -1822,7 +1765,7 @@ _mm256_mul_epu32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_or_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a | (__v4du)__b); @@ -1906,9 +1849,8 @@ _mm256_sad_epu8(__m256i __a, __m256i __b) /// control byte specify the index (within the same 128-bit half) of \a __a /// to copy to the result byte. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_shuffle_epi8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_shuffle_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b); } @@ -2033,10 +1975,9 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector]. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sign_epi8(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sign_epi8(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b); } /// Sets each element of the result to the corresponding element of the @@ -2054,10 +1995,9 @@ _mm256_sign_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sign_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sign_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b); } /// Sets each element of the result to the corresponding element of the @@ -2075,10 +2015,9 @@ _mm256_sign_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sign_epi32(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sign_epi32(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b); } /// Shifts each 128-bit half of the 256-bit integer vector \a a left by @@ -2098,8 +2037,9 @@ _mm256_sign_epi32(__m256i __a, __m256i __b) /// \param imm /// An unsigned immediate value specifying the shift count (in bytes). /// \returns A 256-bit integer vector containing the result. -#define _mm256_slli_si256(a, imm) \ - ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))) +#define _mm256_slli_si256(a, imm) \ + ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v32qi)(__m256i)(a), \ + (int)(imm))) /// Shifts each 128-bit half of the 256-bit integer vector \a a left by /// \a imm bytes, shifting in zero bytes, and returns the result. If \a imm @@ -2118,8 +2058,9 @@ _mm256_sign_epi32(__m256i __a, __m256i __b) /// \param imm /// An unsigned immediate value specifying the shift count (in bytes). /// \returns A 256-bit integer vector containing the result. -#define _mm256_bslli_epi128(a, imm) \ - ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))) +#define _mm256_bslli_epi128(a, imm) \ + ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v32qi)(__m256i)(a), \ + (int)(imm))) /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a /// left by \a __count bits, shifting in zero bits, and returns the result. @@ -2134,9 +2075,8 @@ _mm256_sign_epi32(__m256i __a, __m256i __b) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_slli_epi16(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_slli_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count); } @@ -2155,9 +2095,8 @@ _mm256_slli_epi16(__m256i __a, int __count) /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sll_epi16(__m256i __a, __m128i __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sll_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count); } @@ -2174,9 +2113,8 @@ _mm256_sll_epi16(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_slli_epi32(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_slli_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count); } @@ -2195,9 +2133,8 @@ _mm256_slli_epi32(__m256i __a, int __count) /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sll_epi32(__m256i __a, __m128i __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sll_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count); } @@ -2214,9 +2151,8 @@ _mm256_sll_epi32(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_slli_epi64(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_slli_epi64(__m256i __a, int __count) { return __builtin_ia32_psllqi256((__v4di)__a, __count); } @@ -2235,9 +2171,8 @@ _mm256_slli_epi64(__m256i __a, int __count) /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sll_epi64(__m256i __a, __m128i __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sll_epi64(__m256i __a, __m128i __count) { return __builtin_ia32_psllq256((__v4di)__a, __count); } @@ -2255,9 +2190,8 @@ _mm256_sll_epi64(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srai_epi16(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srai_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count); } @@ -2277,9 +2211,8 @@ _mm256_srai_epi16(__m256i __a, int __count) /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sra_epi16(__m256i __a, __m128i __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sra_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count); } @@ -2297,9 +2230,8 @@ _mm256_sra_epi16(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srai_epi32(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srai_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count); } @@ -2319,9 +2251,8 @@ _mm256_srai_epi32(__m256i __a, int __count) /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sra_epi32(__m256i __a, __m128i __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sra_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count); } @@ -2342,8 +2273,9 @@ _mm256_sra_epi32(__m256i __a, __m128i __count) /// \param imm /// An unsigned immediate value specifying the shift count (in bytes). /// \returns A 256-bit integer vector containing the result. -#define _mm256_srli_si256(a, imm) \ - ((__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))) +#define _mm256_srli_si256(a, imm) \ + ((__m256i)__builtin_ia32_psrldqi256_byteshift((__v32qi)(__m256i)(a), \ + (int)(imm))) /// Shifts each 128-bit half of the 256-bit integer vector in \a a right by /// \a imm bytes, shifting in zero bytes, and returns the result. If @@ -2362,8 +2294,9 @@ _mm256_sra_epi32(__m256i __a, __m128i __count) /// \param imm /// An unsigned immediate value specifying the shift count (in bytes). /// \returns A 256-bit integer vector containing the result. -#define _mm256_bsrli_epi128(a, imm) \ - ((__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))) +#define _mm256_bsrli_epi128(a, imm) \ + ((__m256i)__builtin_ia32_psrldqi256_byteshift((__v32qi)(__m256i)(a), \ + (int)(imm))) /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a /// right by \a __count bits, shifting in zero bits, and returns the result. @@ -2378,9 +2311,8 @@ _mm256_sra_epi32(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srli_epi16(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srli_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count); } @@ -2399,9 +2331,8 @@ _mm256_srli_epi16(__m256i __a, int __count) /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srl_epi16(__m256i __a, __m128i __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srl_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count); } @@ -2418,9 +2349,8 @@ _mm256_srl_epi16(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srli_epi32(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srli_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count); } @@ -2439,9 +2369,8 @@ _mm256_srli_epi32(__m256i __a, int __count) /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srl_epi32(__m256i __a, __m128i __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srl_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count); } @@ -2458,9 +2387,8 @@ _mm256_srl_epi32(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srli_epi64(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srli_epi64(__m256i __a, int __count) { return __builtin_ia32_psrlqi256((__v4di)__a, __count); } @@ -2479,9 +2407,8 @@ _mm256_srli_epi64(__m256i __a, int __count) /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srl_epi64(__m256i __a, __m128i __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srl_epi64(__m256i __a, __m128i __count) { return __builtin_ia32_psrlq256((__v4di)__a, __count); } @@ -2506,9 +2433,8 @@ _mm256_srl_epi64(__m256i __a, __m128i __count) /// \param __b /// A 256-bit integer vector containing the subtrahends. /// \returns A 256-bit integer vector containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sub_epi8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sub_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a - (__v32qu)__b); } @@ -2533,9 +2459,8 @@ _mm256_sub_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing the subtrahends. /// \returns A 256-bit vector of [16 x i16] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sub_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sub_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a - (__v16hu)__b); } @@ -2559,9 +2484,8 @@ _mm256_sub_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing the subtrahends. /// \returns A 256-bit vector of [8 x i32] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sub_epi32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sub_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a - (__v8su)__b); } @@ -2585,9 +2509,8 @@ _mm256_sub_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [4 x i64] containing the subtrahends. /// \returns A 256-bit vector of [4 x i64] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sub_epi64(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sub_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a - (__v4du)__b); } @@ -2611,9 +2534,8 @@ _mm256_sub_epi64(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector containing the subtrahends. /// \returns A 256-bit integer vector containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_subs_epi8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_subs_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_sub_sat((__v32qs)__a, (__v32qs)__b); } @@ -2637,9 +2559,8 @@ _mm256_subs_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing the subtrahends. /// \returns A 256-bit vector of [16 x i16] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_subs_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_subs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_sub_sat((__v16hi)__a, (__v16hi)__b); } @@ -2664,9 +2585,8 @@ _mm256_subs_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector containing the subtrahends. /// \returns A 256-bit integer vector containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_subs_epu8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_subs_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_sub_sat((__v32qu)__a, (__v32qu)__b); } @@ -2690,9 +2610,8 @@ _mm256_subs_epu8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing the subtrahends. /// \returns A 256-bit vector of [16 x i16] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_subs_epu16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_subs_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_sub_sat((__v16hu)__a, (__v16hu)__b); } @@ -2724,9 +2643,8 @@ _mm256_subs_epu16(__m256i __a, __m256i __b) /// A 256-bit integer vector used as the source for the odd-numbered bytes /// of the result. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_unpackhi_epi8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_unpackhi_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31); } @@ -2759,9 +2677,8 @@ _mm256_unpackhi_epi8(__m256i __a, __m256i __b) /// A 256-bit vector of [16 x i16] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_unpackhi_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_unpackhi_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); } @@ -2793,9 +2710,8 @@ _mm256_unpackhi_epi16(__m256i __a, __m256i __b) /// A 256-bit vector of [8 x i32] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_unpackhi_epi32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_unpackhi_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7); } @@ -2823,9 +2739,8 @@ _mm256_unpackhi_epi32(__m256i __a, __m256i __b) /// A 256-bit vector of [4 x i64] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_unpackhi_epi64(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_unpackhi_epi64(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3); } @@ -2857,9 +2772,8 @@ _mm256_unpackhi_epi64(__m256i __a, __m256i __b) /// A 256-bit integer vector used as the source for the odd-numbered bytes /// of the result. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_unpacklo_epi8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_unpacklo_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23); } @@ -2892,9 +2806,8 @@ _mm256_unpacklo_epi8(__m256i __a, __m256i __b) /// A 256-bit vector of [16 x i16] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_unpacklo_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_unpacklo_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11); } @@ -2926,9 +2839,8 @@ _mm256_unpacklo_epi16(__m256i __a, __m256i __b) /// A 256-bit vector of [8 x i32] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_unpacklo_epi32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_unpacklo_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5); } @@ -2956,9 +2868,8 @@ _mm256_unpacklo_epi32(__m256i __a, __m256i __b) /// A 256-bit vector of [4 x i64] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_unpacklo_epi64(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_unpacklo_epi64(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2); } @@ -2974,7 +2885,7 @@ _mm256_unpacklo_epi64(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_xor_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a ^ (__v4du)__b); @@ -3009,9 +2920,8 @@ _mm256_stream_load_si256(const void *__V) /// \param __X /// A 128-bit vector of [4 x float] whose low element will be broadcast. /// \returns A 128-bit vector of [4 x float] containing the result. -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_broadcastss_ps(__m128 __X) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_broadcastss_ps(__m128 __X) { return (__m128)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0); } @@ -3026,9 +2936,8 @@ _mm_broadcastss_ps(__m128 __X) /// \param __a /// A 128-bit vector of [2 x double] whose low element will be broadcast. /// \returns A 128-bit vector of [2 x double] containing the result. -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_broadcastsd_pd(__m128d __a) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_broadcastsd_pd(__m128d __a) { return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); } @@ -3043,9 +2952,8 @@ _mm_broadcastsd_pd(__m128d __a) /// \param __X /// A 128-bit vector of [4 x float] whose low element will be broadcast. /// \returns A 256-bit vector of [8 x float] containing the result. -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_broadcastss_ps(__m128 __X) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcastss_ps(__m128 __X) { return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0); } @@ -3060,9 +2968,8 @@ _mm256_broadcastss_ps(__m128 __X) /// \param __X /// A 128-bit vector of [2 x double] whose low element will be broadcast. /// \returns A 256-bit vector of [4 x double] containing the result. -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_broadcastsd_pd(__m128d __X) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcastsd_pd(__m128d __X) { return (__m256d)__builtin_shufflevector((__v2df)__X, (__v2df)__X, 0, 0, 0, 0); } @@ -3076,9 +2983,8 @@ _mm256_broadcastsd_pd(__m128d __X) /// \param __X /// A 128-bit integer vector to be broadcast. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcastsi128_si256(__m128i __X) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcastsi128_si256(__m128i __X) { return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 1, 0, 1); } @@ -3168,9 +3074,8 @@ _mm256_broadcastsi128_si256(__m128i __X) /// \param __X /// A 128-bit integer vector whose low byte will be broadcast. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcastb_epi8(__m128i __X) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcastb_epi8(__m128i __X) { return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } @@ -3184,9 +3089,8 @@ _mm256_broadcastb_epi8(__m128i __X) /// \param __X /// A 128-bit vector of [8 x i16] whose low element will be broadcast. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcastw_epi16(__m128i __X) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcastw_epi16(__m128i __X) { return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } @@ -3200,9 +3104,8 @@ _mm256_broadcastw_epi16(__m128i __X) /// \param __X /// A 128-bit vector of [4 x i32] whose low element will be broadcast. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcastd_epi32(__m128i __X) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcastd_epi32(__m128i __X) { return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0); } @@ -3216,9 +3119,8 @@ _mm256_broadcastd_epi32(__m128i __X) /// \param __X /// A 128-bit vector of [2 x i64] whose low element will be broadcast. /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcastq_epi64(__m128i __X) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcastq_epi64(__m128i __X) { return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0, 0, 0); } @@ -3232,9 +3134,8 @@ _mm256_broadcastq_epi64(__m128i __X) /// \param __X /// A 128-bit integer vector whose low byte will be broadcast. /// \returns A 128-bit integer vector containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_broadcastb_epi8(__m128i __X) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_broadcastb_epi8(__m128i __X) { return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } @@ -3248,9 +3149,8 @@ _mm_broadcastb_epi8(__m128i __X) /// \param __X /// A 128-bit vector of [8 x i16] whose low element will be broadcast. /// \returns A 128-bit vector of [8 x i16] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_broadcastw_epi16(__m128i __X) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_broadcastw_epi16(__m128i __X) { return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0); } @@ -3264,9 +3164,8 @@ _mm_broadcastw_epi16(__m128i __X) /// \param __X /// A 128-bit vector of [4 x i32] whose low element will be broadcast. /// \returns A 128-bit vector of [4 x i32] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_broadcastd_epi32(__m128i __X) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_broadcastd_epi32(__m128i __X) { return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0); } @@ -3280,9 +3179,8 @@ _mm_broadcastd_epi32(__m128i __X) /// \param __X /// A 128-bit vector of [2 x i64] whose low element will be broadcast. /// \returns A 128-bit vector of [2 x i64] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_broadcastq_epi64(__m128i __X) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_broadcastq_epi64(__m128i __X) { return (__m128i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0); } @@ -3308,9 +3206,8 @@ _mm_broadcastq_epi64(__m128i __X) /// A 256-bit vector of [8 x i32] containing indexes of values to use from /// \a __a. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b); } @@ -3366,9 +3263,8 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) /// A 256-bit vector of [8 x i32] containing indexes of values to use from /// \a __a. /// \returns A 256-bit vector of [8 x float] containing the result. -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_permutevar8x32_ps(__m256 __a, __m256i __b) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_permutevar8x32_ps(__m256 __a, __m256i __b) { return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b); } @@ -3756,7 +3652,7 @@ _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y) /// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sllv_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y); @@ -3778,7 +3674,7 @@ _mm256_sllv_epi32(__m256i __X, __m256i __Y) /// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [4 x i32] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sllv_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y); @@ -3800,7 +3696,7 @@ _mm_sllv_epi32(__m128i __X, __m128i __Y) /// A 256-bit vector of [4 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sllv_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y); @@ -3822,7 +3718,7 @@ _mm256_sllv_epi64(__m256i __X, __m256i __Y) /// A 128-bit vector of [2 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [2 x i64] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sllv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y); @@ -3845,7 +3741,7 @@ _mm_sllv_epi64(__m128i __X, __m128i __Y) /// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srav_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y); @@ -3868,7 +3764,7 @@ _mm256_srav_epi32(__m256i __X, __m256i __Y) /// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [4 x i32] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srav_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y); @@ -3890,7 +3786,7 @@ _mm_srav_epi32(__m128i __X, __m128i __Y) /// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srlv_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y); @@ -3912,7 +3808,7 @@ _mm256_srlv_epi32(__m256i __X, __m256i __Y) /// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [4 x i32] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srlv_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y); @@ -3934,7 +3830,7 @@ _mm_srlv_epi32(__m128i __X, __m128i __Y) /// A 256-bit vector of [4 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srlv_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y); @@ -3956,7 +3852,7 @@ _mm256_srlv_epi64(__m256i __X, __m256i __Y) /// A 128-bit vector of [2 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [2 x i64] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srlv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y); @@ -5289,5 +5185,7 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y) #undef __DEFAULT_FN_ATTRS256 #undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR #endif /* __AVX2INTRIN_H */ diff --git a/lib/include/avx512bf16intrin.h b/lib/include/avx512bf16intrin.h index b28d2e243f..458d1f8b99 100644 --- a/lib/include/avx512bf16intrin.h +++ b/lib/include/avx512bf16intrin.h @@ -19,12 +19,19 @@ typedef __bf16 __v32bf __attribute__((__vector_size__(64), __aligned__(64))); typedef __bf16 __m512bh __attribute__((__vector_size__(64), __aligned__(64))); typedef __bf16 __bfloat16 __attribute__((deprecated("use __bf16 instead"))); -#define __DEFAULT_FN_ATTRS512 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16,evex512"), \ +#define __DEFAULT_FN_ATTRS512 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \ __min_vector_width__(512))) #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512bf16,no-evex512"))) + __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"))) + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif /// Convert One BF16 Data to One Single Float Data. /// @@ -36,8 +43,8 @@ typedef __bf16 __bfloat16 __attribute__((deprecated("use __bf16 instead"))); /// A bfloat data. /// \returns A float data whose sign field and exponent field keep unchanged, /// and fraction field is extended to 23 bits. -static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtsbh_ss(__bf16 __A) { - return __builtin_ia32_cvtsbf162ss_32(__A); +static __inline__ float __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtsbh_ss(__bf16 __A) { + return (float)(__A); } /// Convert Two Packed Single Data to One Packed BF16 Data. @@ -236,9 +243,9 @@ _mm512_maskz_dpbf16_ps(__mmask16 __U, __m512 __D, __m512bh __A, __m512bh __B) { /// \param __A /// A 256-bit vector of [16 x bfloat]. /// \returns A 512-bit vector of [16 x float] come from conversion of __A -static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpbh_ps(__m256bh __A) { - return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32( - (__m512i)_mm512_cvtepi16_epi32((__m256i)__A), 16)); +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtpbh_ps(__m256bh __A) { + return (__m512) __builtin_convertvector(__A, __v16sf); } /// Convert Packed BF16 Data to Packed float Data using zeroing mask. @@ -251,10 +258,11 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpbh_ps(__m256bh __A) { /// \param __A /// A 256-bit vector of [16 x bfloat]. /// \returns A 512-bit vector of [16 x float] come from conversion of __A -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtpbh_ps(__mmask16 __U, __m256bh __A) { - return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32( - (__m512i)_mm512_maskz_cvtepi16_epi32((__mmask16)__U, (__m256i)__A), 16)); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_cvtpbh_ps(__A), + (__v16sf)_mm512_setzero_ps()); } /// Convert Packed BF16 Data to Packed float Data using merging mask. @@ -269,15 +277,16 @@ _mm512_maskz_cvtpbh_ps(__mmask16 __U, __m256bh __A) { /// \param __A /// A 256-bit vector of [16 x bfloat]. /// \returns A 512-bit vector of [16 x float] come from conversion of __A -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpbh_ps(__m512 __S, __mmask16 __U, __m256bh __A) { - return _mm512_castsi512_ps((__m512i)_mm512_mask_slli_epi32( - (__m512i)__S, (__mmask16)__U, - (__m512i)_mm512_cvtepi16_epi32((__m256i)__A), 16)); + return (__m512)__builtin_ia32_selectps_512( + (__mmask16)__U, (__v16sf)_mm512_cvtpbh_ps(__A), (__v16sf)__S); } #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #undef __DEFAULT_FN_ATTRS512 +#undef __DEFAULT_FN_ATTRS512_CONSTEXPR #endif #endif diff --git a/lib/include/avx512bitalgintrin.h b/lib/include/avx512bitalgintrin.h index 3c446b34e7..f5e9b1a84f 100644 --- a/lib/include/avx512bitalgintrin.h +++ b/lib/include/avx512bitalgintrin.h @@ -15,53 +15,44 @@ #define __AVX512BITALGINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512bitalg,evex512"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), \ + __min_vector_width__(512))) constexpr +#else +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), \ __min_vector_width__(512))) +#endif -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_popcnt_epi16(__m512i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi16(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v32hu)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) -{ - return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U, - (__v32hi) _mm512_popcnt_epi16(__B), - (__v32hi) __A); +_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) { + return (__m512i)__builtin_ia32_selectw_512( + (__mmask32)__U, (__v32hi)_mm512_popcnt_epi16(__B), (__v32hi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) -{ - return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_si512(), - __U, - __B); +_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) { + return _mm512_mask_popcnt_epi16((__m512i)_mm512_setzero_si512(), __U, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_popcnt_epi8(__m512i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi8(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v64qu)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) -{ - return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U, - (__v64qi) _mm512_popcnt_epi8(__B), - (__v64qi) __A); +_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_popcnt_epi8(__B), (__v64qi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) -{ - return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_si512(), - __U, - __B); +_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) { + return _mm512_mask_popcnt_epi8((__m512i)_mm512_setzero_si512(), __U, __B); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS @@ -80,7 +71,5 @@ _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) __B); } - #undef __DEFAULT_FN_ATTRS - #endif diff --git a/lib/include/avx512bwintrin.h b/lib/include/avx512bwintrin.h index c854720de6..cd4663abe7 100644 --- a/lib/include/avx512bwintrin.h +++ b/lib/include/avx512bwintrin.h @@ -19,153 +19,150 @@ typedef unsigned long long __mmask64; /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS512 \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512bw,evex512"), __min_vector_width__(512))) + __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), \ + __min_vector_width__(512))) #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512bw,no-evex512"))) + __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) -static __inline __mmask32 __DEFAULT_FN_ATTRS -_knot_mask32(__mmask32 __M) -{ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + +static __inline __mmask32 + __DEFAULT_FN_ATTRS_CONSTEXPR _knot_mask32(__mmask32 __M) { return __builtin_ia32_knotsi(__M); } -static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) { +static __inline __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_knot_mask64(__mmask64 __M) { return __builtin_ia32_knotdi(__M); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_kand_mask32(__mmask32 __A, __mmask32 __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_kand_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_kand_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_kandn_mask32(__mmask32 __A, __mmask32 __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_kandn_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kandn_mask64(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_kandn_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_kor_mask32(__mmask32 __A, __mmask32 __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_kor_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kor_mask64(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_kor_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_kxnor_mask32(__mmask32 __A, __mmask32 __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_kxnor_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxnor_mask64(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_kxnor_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_kxor_mask32(__mmask32 __A, __mmask32 __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_kxor_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxor_mask64(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_kxor_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B) -{ +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR +_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B) { return (unsigned char)__builtin_ia32_kortestcsi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B) -{ +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR +_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B) { return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B); return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) { return (unsigned char)__builtin_ia32_kortestcdi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) { return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B); return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_ktestc_mask32_u8(__mmask32 __A, __mmask32 __B) -{ +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR +_ktestc_mask32_u8(__mmask32 __A, __mmask32 __B) { return (unsigned char)__builtin_ia32_ktestcsi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_ktestz_mask32_u8(__mmask32 __A, __mmask32 __B) -{ +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR +_ktestz_mask32_u8(__mmask32 __A, __mmask32 __B) { return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B); return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) { return (unsigned char)__builtin_ia32_ktestcdi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) { return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B); return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_kadd_mask32(__mmask32 __A, __mmask32 __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_kadd_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kadd_mask64(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_kadd_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B); } @@ -181,22 +178,22 @@ static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kadd_mask64(__mmask64 __A, #define _kshiftri_mask64(A, I) \ ((__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I))) -static __inline__ unsigned int __DEFAULT_FN_ATTRS -_cvtmask32_u32(__mmask32 __A) { +static __inline__ unsigned int + __DEFAULT_FN_ATTRS_CONSTEXPR _cvtmask32_u32(__mmask32 __A) { return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A); } -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR _cvtmask64_u64(__mmask64 __A) { return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR _cvtu32_mask32(unsigned int __A) { return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR _cvtu64_mask64(unsigned long long __A) { return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A); } @@ -362,168 +359,159 @@ static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A, #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_add_epi8 (__m512i __A, __m512i __B) { +static __inline__ __m512i + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi8(__m512i __A, __m512i __B) { return (__m512i) ((__v64qu) __A + (__v64qu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_add_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_add_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_sub_epi8 (__m512i __A, __m512i __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sub_epi8(__m512i __A, __m512i __B) { return (__m512i) ((__v64qu) __A - (__v64qu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_sub_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_sub_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_add_epi16 (__m512i __A, __m512i __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_add_epi16(__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A + (__v32hu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_add_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_add_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_sub_epi16 (__m512i __A, __m512i __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sub_epi16(__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A - (__v32hu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sub_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sub_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullo_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A * (__v32hu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mullo_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mullo_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, (__v64qi) __W, (__v64qi) __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_blend_epi16(__mmask32 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, (__v32hi) __W, (__v32hi) __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_abs_epi8 (__m512i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_abs_epi8(__m512i __A) { return (__m512i)__builtin_elementwise_abs((__v64qs)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_abs_epi8(__m512i __W, __mmask64 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_abs_epi8(__A), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_abs_epi8(__mmask64 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_abs_epi8(__A), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_abs_epi16 (__m512i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_abs_epi16(__m512i __A) { return (__m512i)__builtin_elementwise_abs((__v32hi)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_abs_epi16(__m512i __W, __mmask32 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_abs_epi16(__A), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_abs_epi16(__mmask32 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_abs_epi16(__A), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_packs_epi32(__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_packs_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, @@ -531,21 +519,19 @@ _mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_packs_epi32(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_packs_epi16(__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_packs_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, @@ -553,7 +539,7 @@ _mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, @@ -561,13 +547,12 @@ _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_packus_epi32(__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_packus_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, @@ -575,7 +560,7 @@ _mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, @@ -583,13 +568,12 @@ _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_packus_epi16(__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_packus_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, @@ -597,7 +581,7 @@ _mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, @@ -605,35 +589,31 @@ _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_adds_epi8 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_adds_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_add_sat((__v64qs)__A, (__v64qs)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_adds_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_adds_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_adds_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_adds_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_adds_epi16 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_adds_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_add_sat((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -641,7 +621,7 @@ _mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -649,13 +629,12 @@ _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_adds_epu8 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_adds_epu8(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_add_sat((__v64qu) __A, (__v64qu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, @@ -663,7 +642,7 @@ _mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, @@ -671,280 +650,238 @@ _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B) (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_adds_epu16 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_adds_epu16(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_add_sat((__v32hu) __A, (__v32hu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_adds_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_adds_epu16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_adds_epu16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_adds_epu16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_avg_epu8 (__m512i __A, __m512i __B) -{ - return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B); +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_avg_epu8(__m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_pavgb512((__v64qu)__A, (__v64qu)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, - __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_avg_epu8(__A, __B), (__v64qi)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, - (__v64qi)_mm512_avg_epu8(__A, __B), - (__v64qi)__W); + (__v64qi)_mm512_avg_epu8(__A, __B), + (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) -{ - return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, - (__v64qi)_mm512_avg_epu8(__A, __B), - (__v64qi)_mm512_setzero_si512()); +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_avg_epu16(__m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_pavgw512((__v32hu)__A, (__v32hu)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_avg_epu16 (__m512i __A, __m512i __B) -{ - return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B); +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectw_512( + (__mmask32)__U, (__v32hi)_mm512_avg_epu16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, - __m512i __B) -{ - return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, - (__v32hi)_mm512_avg_epu16(__A, __B), - (__v32hi)__W); +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectw_512( + (__mmask32)__U, (__v32hi)_mm512_avg_epu16(__A, __B), + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) -{ - return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, - (__v32hi)_mm512_avg_epu16(__A, __B), - (__v32hi) _mm512_setzero_si512()); -} - -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_max_epi8 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_max_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_max((__v64qs) __A, (__v64qs) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_max_epi8(__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_max_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_max_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_max_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_max_epi16 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_max_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_max((__v32hi) __A, (__v32hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_max_epi16(__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_max_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A, - __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_max_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_max_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_max_epu8 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_max_epu8(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_max((__v64qu)__A, (__v64qu)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_max_epu8(__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_max_epu8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_max_epu8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_max_epu8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_max_epu16 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_max_epu16(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_max((__v32hu)__A, (__v32hu)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_max_epu16(__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_max_epu16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_max_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_max_epu16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_min_epi8 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_min_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_min((__v64qs) __A, (__v64qs) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_min_epi8(__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_min_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_min_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_min_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_min_epi16 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_min_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_min((__v32hi) __A, (__v32hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_min_epi16(__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_min_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_min_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_min_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_min_epu8 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_min_epu8(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_min((__v64qu)__A, (__v64qu)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_min_epu8(__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_min_epu8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_min_epu8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_min_epu8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_min_epu16 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_min_epu16(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_min((__v32hu)__A, (__v32hu)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_min_epu16(__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_min_epu16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_min_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_min_epu16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_shuffle_epi8(__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_shuffle_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_shuffle_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_shuffle_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_subs_epi8 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_subs_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_sub_sat((__v64qs)__A, (__v64qs)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, @@ -952,7 +889,7 @@ _mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, @@ -960,13 +897,12 @@ _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_subs_epi16 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_subs_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_sub_sat((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -974,7 +910,7 @@ _mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -982,13 +918,12 @@ _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_subs_epu8 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_subs_epu8(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_sub_sat((__v64qu) __A, (__v64qu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, @@ -996,7 +931,7 @@ _mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, @@ -1004,13 +939,12 @@ _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B) (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_subs_epu16 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_subs_epu16(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_sub_sat((__v32hu) __A, (__v32hu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1018,7 +952,7 @@ _mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1026,113 +960,97 @@ _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I, - __m512i __B) -{ + __m512i __B) { return (__m512i)__builtin_ia32_selectw_512(__U, (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), (__v32hi)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U, - __m512i __B) -{ + __m512i __B) { return (__m512i)__builtin_ia32_selectw_512(__U, (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), (__v32hi)__I); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, - __m512i __B) -{ + __m512i __B) { return (__m512i)__builtin_ia32_selectw_512(__U, (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mulhrs_epi16(__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mulhrs_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mulhrs_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mulhrs_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mulhi_epi16(__m512i __A, __m512i __B) -{ - return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B); +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mulhi_epi16(__m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_pmulhw512((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, - __m512i __B) -{ - return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, - (__v32hi)_mm512_mulhi_epi16(__A, __B), - (__v32hi)__W); +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectw_512( + (__mmask32)__U, (__v32hi)_mm512_mulhi_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) -{ - return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, - (__v32hi)_mm512_mulhi_epi16(__A, __B), - (__v32hi)_mm512_setzero_si512()); +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectw_512( + (__mmask32)__U, (__v32hi)_mm512_mulhi_epi16(__A, __B), + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mulhi_epu16(__m512i __A, __m512i __B) -{ - return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B); +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mulhi_epu16(__m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_pmulhuw512((__v32hu)__A, (__v32hu)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) -{ - return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, - (__v32hi)_mm512_mulhi_epu16(__A, __B), - (__v32hi)__W); +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectw_512( + (__mmask32)__U, (__v32hi)_mm512_mulhi_epu16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B) -{ - return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, - (__v32hi)_mm512_mulhi_epu16(__A, __B), - (__v32hi)_mm512_setzero_si512()); +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_mulhi_epu16(__mmask32 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectw_512( + (__mmask32)__U, (__v32hi)_mm512_mulhi_epu16(__A, __B), + (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maddubs_epi16(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, @@ -1140,26 +1058,26 @@ _mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, (__v32hi)_mm512_maddubs_epi16(__X, __Y), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_madd_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_madd_epi16(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_madd_epi16(__A, __B), @@ -1247,7 +1165,7 @@ _mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 8, 64+8, 9, 64+9, @@ -1268,21 +1186,21 @@ _mm512_unpackhi_epi8(__m512i __A, __m512i __B) { 62, 64+62, 63, 64+63); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_unpackhi_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_unpackhi_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 4, 32+4, 5, 32+5, @@ -1295,21 +1213,21 @@ _mm512_unpackhi_epi16(__m512i __A, __m512i __B) { 30, 32+30, 31, 32+31); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_unpackhi_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_unpackhi_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 0, 64+0, 1, 64+1, @@ -1330,21 +1248,21 @@ _mm512_unpacklo_epi8(__m512i __A, __m512i __B) { 54, 64+54, 55, 64+55); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_unpacklo_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_unpacklo_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 0, 32+0, 1, 32+1, @@ -1357,67 +1275,60 @@ _mm512_unpacklo_epi16(__m512i __A, __m512i __B) { 26, 32+26, 27, 32+27); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_unpacklo_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_unpacklo_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepi8_epi16(__m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepi8_epi16(__m256i __A) { /* This function always performs a signed extension, but __v32qi is a char which may be signed or unsigned, so use __v32qs. */ return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_cvtepi8_epi16(__A), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_cvtepi8_epi16(__A), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepu8_epi16(__m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepu8_epi16(__m256i __A) { return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_cvtepu8_epi16(__A), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_cvtepu8_epi16(__A), (__v32hi)_mm512_setzero_si512()); } - #define _mm512_shufflehi_epi16(A, imm) \ ((__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm))) @@ -1450,13 +1361,13 @@ _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) (imm)), \ (__v32hi)_mm512_setzero_si512())) -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1464,7 +1375,7 @@ _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1472,61 +1383,56 @@ _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_sll_epi16(__m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sll_epi16(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sll_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sll_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_slli_epi16(__m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_slli_epi16(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, (int)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, - unsigned int __B) -{ + unsigned int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_slli_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_slli_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -#define _mm512_bslli_epi128(a, imm) \ - ((__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))) +#define _mm512_bslli_epi128(a, imm) \ + ((__m512i)__builtin_ia32_pslldqi512_byteshift((__v64qi)(__m512i)(a), \ + (int)(imm))) -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1534,7 +1440,7 @@ _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1542,13 +1448,13 @@ _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1556,7 +1462,7 @@ _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1564,100 +1470,89 @@ _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_sra_epi16(__m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sra_epi16(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sra_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sra_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srai_epi16(__m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srai_epi16(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, (int)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, - unsigned int __B) -{ + unsigned int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srai_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srai_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srl_epi16(__m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srl_epi16(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srl_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srl_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srli_epi16(__m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srli_epi16(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, (int)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, - unsigned int __B) -{ + unsigned int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srli_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srli_epi16(__A, (unsigned int)__B), (__v32hi)_mm512_setzero_si512()); } -#define _mm512_bsrli_epi128(a, imm) \ - ((__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))) +#define _mm512_bsrli_epi128(a, imm) \ + ((__m512i)__builtin_ia32_psrldqi512_byteshift((__v64qi)(__m512i)(a), \ + (int)(imm))) -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, @@ -1665,23 +1560,21 @@ _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) (__v32hi) __W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_mov_epi16(__mmask32 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, (__v32hi) __A, (__v32hi) _mm512_setzero_si512 ()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_mov_epi8(__m512i __W, __mmask64 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, (__v64qi) __A, (__v64qi) __W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, @@ -1689,7 +1582,7 @@ _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A) (__v64qi) _mm512_setzero_si512 ()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A) { return (__m512i) __builtin_ia32_selectb_512(__M, @@ -1697,23 +1590,21 @@ _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A) (__v64qi) __O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_set1_epi8 (__mmask64 __M, char __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_set1_epi8(__mmask64 __M, char __A) { return (__m512i) __builtin_ia32_selectb_512(__M, (__v64qi) _mm512_set1_epi8(__A), (__v64qi) _mm512_setzero_si512()); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kunpackd(__mmask64 __A, __mmask64 __B) { return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, (__mmask64) __B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_mm512_kunpackw (__mmask32 __A, __mmask32 __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kunpackw(__mmask32 __A, __mmask32 __B) { return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, (__mmask32) __B); } @@ -1859,33 +1750,28 @@ _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) _mm512_setzero_si512()); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 -_mm512_movepi8_mask (__m512i __A) -{ +static __inline__ __mmask64 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_movepi8_mask(__m512i __A) { return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 -_mm512_movepi16_mask (__m512i __A) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_movepi16_mask(__m512i __A) { return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_movm_epi8 (__mmask64 __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_movm_epi8(__mmask64 __A) { return (__m512i) __builtin_ia32_cvtmask2b512 (__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_movm_epi16 (__mmask32 __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_movm_epi16(__mmask32 __A) { return (__m512i) __builtin_ia32_cvtmask2w512 (__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_broadcastb_epi8 (__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_broadcastb_epi8(__m128i __A) { return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -1893,7 +1779,7 @@ _mm512_broadcastb_epi8 (__m128i __A) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectb_512(__M, @@ -1901,15 +1787,14 @@ _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A) (__v64qi) __O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcastb_epi8(__mmask64 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectb_512(__M, (__v64qi) _mm512_broadcastb_epi8(__A), (__v64qi) _mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A) { return (__m512i) __builtin_ia32_selectw_512(__M, @@ -1917,23 +1802,21 @@ _mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A) (__v32hi) __O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_set1_epi16 (__mmask32 __M, short __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_set1_epi16(__mmask32 __M, short __A) { return (__m512i) __builtin_ia32_selectw_512(__M, (__v32hi) _mm512_set1_epi16(__A), (__v32hi) _mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_broadcastw_epi16 (__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_broadcastw_epi16(__m128i __A) { return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectw_512(__M, @@ -1941,7 +1824,7 @@ _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A) (__v32hi) __O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectw_512(__M, @@ -1949,25 +1832,21 @@ _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) (__v32hi) _mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_permutexvar_epi16 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutexvar_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A, - __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutexvar_epi16(__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_permutexvar_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, - __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutexvar_epi16(__m512i __W, __mmask32 __M, __m512i __A, + __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_permutexvar_epi16(__A, __B), (__v32hi)__W); @@ -2010,5 +1889,7 @@ _mm512_sad_epu8 (__m512i __A, __m512i __B) #undef __DEFAULT_FN_ATTRS512 #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS512_CONSTEXPR +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif diff --git a/lib/include/avx512cdintrin.h b/lib/include/avx512cdintrin.h index 33b552f6fe..f9de207b76 100644 --- a/lib/include/avx512cdintrin.h +++ b/lib/include/avx512cdintrin.h @@ -15,109 +15,96 @@ #define __AVX512CDINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512cd,evex512"), __min_vector_width__(512))) + __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), \ + __min_vector_width__(512))) constexpr +#else +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), \ + __min_vector_width__(512))) +#endif static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_conflict_epi64 (__m512i __A) -{ - return (__m512i) __builtin_ia32_vpconflictdi_512 ((__v8di) __A); +_mm512_conflict_epi64(__m512i __A) { + return (__m512i)__builtin_ia32_vpconflictdi_512((__v8di)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A) -{ +_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectq_512( + (__mmask8)__U, (__v8di)_mm512_conflict_epi64(__A), (__v8di)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_conflict_epi64(__A), - (__v8di)__W); + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A) -{ - return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, - (__v8di)_mm512_conflict_epi64(__A), - (__v8di)_mm512_setzero_si512 ()); +_mm512_conflict_epi32(__m512i __A) { + return (__m512i)__builtin_ia32_vpconflictsi_512((__v16si)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_conflict_epi32 (__m512i __A) -{ - return (__m512i) __builtin_ia32_vpconflictsi_512 ((__v16si) __A); +_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A) -{ - return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, - (__v16si)_mm512_conflict_epi32(__A), - (__v16si)__W); +_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi32(__m512i __A) { + return (__m512i)__builtin_elementwise_clzg((__v16si)__A, + (__v16si)_mm512_set1_epi32(32)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A) -{ - return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, - (__v16si)_mm512_conflict_epi32(__A), - (__v16si)_mm512_setzero_si512()); +_mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_lzcnt_epi32(__A), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_lzcnt_epi32 (__m512i __A) -{ - return (__m512i) __builtin_ia32_vplzcntd_512 ((__v16si) __A); -} - -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A) -{ - return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, - (__v16si)_mm512_lzcnt_epi32(__A), - (__v16si)__W); -} - -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A) -{ +_mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_lzcnt_epi32(__A), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_lzcnt_epi64 (__m512i __A) -{ - return (__m512i) __builtin_ia32_vplzcntq_512 ((__v8di) __A); +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi64(__m512i __A) { + return (__m512i)__builtin_elementwise_clzg( + (__v8di)__A, (__v8di)_mm512_set1_epi64((long long)64)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A) -{ - return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, - (__v8di)_mm512_lzcnt_epi64(__A), - (__v8di)__W); +_mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectq_512( + (__mmask8)__U, (__v8di)_mm512_lzcnt_epi64(__A), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A) -{ +_mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_lzcnt_epi64(__A), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_broadcastmb_epi64 (__mmask8 __A) -{ - return (__m512i) _mm512_set1_epi64((long long) __A); +_mm512_broadcastmb_epi64(__mmask8 __A) { + return (__m512i)_mm512_set1_epi64((long long)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_broadcastmw_epi32 (__mmask16 __A) -{ - return (__m512i) _mm512_set1_epi32((int) __A); - +_mm512_broadcastmw_epi32(__mmask16 __A) { + return (__m512i)_mm512_set1_epi32((int)__A); } #undef __DEFAULT_FN_ATTRS diff --git a/lib/include/avx512dqintrin.h b/lib/include/avx512dqintrin.h index 88b48e3a32..084ac89182 100644 --- a/lib/include/avx512dqintrin.h +++ b/lib/include/avx512dqintrin.h @@ -15,110 +15,105 @@ #define __AVX512DQINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq,evex512"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS512 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), \ + __min_vector_width__(512))) #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512dq,no-evex512"))) + __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"))) -static __inline __mmask8 __DEFAULT_FN_ATTRS -_knot_mask8(__mmask8 __M) -{ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + +static __inline __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR +_knot_mask8(__mmask8 __M) { return __builtin_ia32_knotqi(__M); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS -_kand_mask8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR +_kand_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS -_kandn_mask8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR +_kandn_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS -_kor_mask8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR +_kor_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS -_kxnor_mask8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR +_kxnor_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS -_kxor_mask8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR +_kxor_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_kortestc_mask8_u8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR +_kortestc_mask8_u8(__mmask8 __A, __mmask8 __B) { return (unsigned char)__builtin_ia32_kortestcqi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_kortestz_mask8_u8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR +_kortestz_mask8_u8(__mmask8 __A, __mmask8 __B) { return (unsigned char)__builtin_ia32_kortestzqi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) { *__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B); return (unsigned char)__builtin_ia32_kortestzqi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_ktestc_mask8_u8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR +_ktestc_mask8_u8(__mmask8 __A, __mmask8 __B) { return (unsigned char)__builtin_ia32_ktestcqi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_ktestz_mask8_u8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR +_ktestz_mask8_u8(__mmask8 __A, __mmask8 __B) { return (unsigned char)__builtin_ia32_ktestzqi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) { *__C = (unsigned char)__builtin_ia32_ktestcqi(__A, __B); return (unsigned char)__builtin_ia32_ktestzqi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_ktestc_mask16_u8(__mmask16 __A, __mmask16 __B) -{ +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR +_ktestc_mask16_u8(__mmask16 __A, __mmask16 __B) { return (unsigned char)__builtin_ia32_ktestchi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_ktestz_mask16_u8(__mmask16 __A, __mmask16 __B) -{ +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR +_ktestz_mask16_u8(__mmask16 __A, __mmask16 __B) { return (unsigned char)__builtin_ia32_ktestzhi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) { *__C = (unsigned char)__builtin_ia32_ktestchi(__A, __B); return (unsigned char)__builtin_ia32_ktestzhi(__A, __B); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS -_kadd_mask8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR +_kadd_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kaddqi((__mmask8)__A, (__mmask8)__B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_kadd_mask16(__mmask16 __A, __mmask16 __B) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_kadd_mask16(__mmask16 __A, __mmask16 __B) { return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B); } @@ -128,12 +123,12 @@ _kadd_mask16(__mmask16 __A, __mmask16 __B) #define _kshiftri_mask8(A, I) \ ((__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I))) -static __inline__ unsigned int __DEFAULT_FN_ATTRS -_cvtmask8_u32(__mmask8 __A) { +static __inline__ unsigned int + __DEFAULT_FN_ATTRS_CONSTEXPR _cvtmask8_u32(__mmask8 __A) { return (unsigned int)__builtin_ia32_kmovb((__mmask8)__A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR _cvtu32_mask8(unsigned int __A) { return (__mmask8)__builtin_ia32_kmovb((__mmask8)__A); } @@ -148,26 +143,26 @@ _store_mask8(__mmask8 *__A, __mmask8 __B) { *(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mullo_epi64 (__m512i __A, __m512i __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mullo_epi64(__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A * (__v8du) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_mullo_epi64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_mullo_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_pd(__m512d __A, __m512d __B) { return (__m512d)((__v8du)__A ^ (__v8du)__B); } @@ -186,7 +181,7 @@ _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) { (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_ps (__m512 __A, __m512 __B) { return (__m512)((__v16su)__A ^ (__v16su)__B); } @@ -205,7 +200,7 @@ _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) { (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_pd(__m512d __A, __m512d __B) { return (__m512d)((__v8du)__A | (__v8du)__B); } @@ -224,7 +219,7 @@ _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) { (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_ps(__m512 __A, __m512 __B) { return (__m512)((__v16su)__A | (__v16su)__B); } @@ -243,7 +238,7 @@ _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) { (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_pd(__m512d __A, __m512d __B) { return (__m512d)((__v8du)__A & (__v8du)__B); } @@ -262,7 +257,7 @@ _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) { (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_ps(__m512 __A, __m512 __B) { return (__m512)((__v16su)__A & (__v16su)__B); } @@ -281,7 +276,7 @@ _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) { (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_pd(__m512d __A, __m512d __B) { return (__m512d)(~(__v8du)__A & (__v8du)__B); } @@ -300,7 +295,7 @@ _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) { (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_ps(__m512 __A, __m512 __B) { return (__m512)(~(__v16su)__A & (__v16su)__B); } @@ -475,21 +470,20 @@ _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) { (__v8di)_mm512_setzero_si512(), \ (__mmask8)(U), (int)(R))) - -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_cvtepi64_pd (__m512i __A) { +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepi64_pd(__m512i __A) { return (__m512d)__builtin_convertvector((__v8di)__A, __v8df); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) { +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_cvtepi64_pd(__A), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) { +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_cvtepi64_pd(__A), (__v8df)_mm512_setzero_pd()); @@ -706,20 +700,20 @@ _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) { (__v8di)_mm512_setzero_si512(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_cvtepu64_pd (__m512i __A) { +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepu64_pd(__m512i __A) { return (__m512d)__builtin_convertvector((__v8du)__A, __v8df); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) { +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_cvtepu64_pd(__A), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) { +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_cvtepu64_pd(__A), (__v8df)_mm512_setzero_pd()); @@ -1052,177 +1046,154 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) { (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(C), (int)(R))) -static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 -_mm512_movepi32_mask (__m512i __A) -{ +static __inline__ __mmask16 + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movepi32_mask(__m512i __A) { return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_movm_epi32 (__mmask16 __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_movm_epi32(__mmask16 __A) { return (__m512i) __builtin_ia32_cvtmask2d512 (__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_movm_epi64 (__mmask8 __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_movm_epi64(__mmask8 __A) { return (__m512i) __builtin_ia32_cvtmask2q512 (__A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 -_mm512_movepi64_mask (__m512i __A) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_movepi64_mask(__m512i __A) { return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A); } - -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_broadcast_f32x2 (__m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_broadcast_f32x2(__m128 __A) { return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_f32x2(__m512 __O, __mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x2(__A), (__v16sf)__O); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_f32x2(__mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x2(__A), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_broadcast_f32x8(__m256 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_broadcast_f32x8(__m256 __A) { return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x8(__A), (__v16sf)__O); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x8(__A), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_broadcast_f64x2(__m128d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_broadcast_f64x2(__m128d __A) { return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, (__v8df)_mm512_broadcast_f64x2(__A), (__v8df)__O); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, (__v8df)_mm512_broadcast_f64x2(__A), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_broadcast_i32x2 (__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_broadcast_i32x2(__m128i __A) { return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x2(__A), (__v16si)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x2(__A), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_broadcast_i32x8(__m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_broadcast_i32x8(__m256i __A) { return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x8(__A), (__v16si)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x8(__A), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_broadcast_i64x2(__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_broadcast_i64x2(__m128i __A) { return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_broadcast_i64x2(__A), (__v8di)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_broadcast_i64x2(__A), (__v8di)_mm512_setzero_si512()); } -#define _mm512_extractf32x8_ps(A, imm) \ - ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ - (__v8sf)_mm256_undefined_ps(), \ - (__mmask8)-1)) +#define _mm512_extractf32x8_ps(A, imm) \ + ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8) - 1)) #define _mm512_mask_extractf32x8_ps(W, U, A, imm) \ ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ @@ -1234,11 +1205,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U))) -#define _mm512_extractf64x2_pd(A, imm) \ - ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ - (int)(imm), \ - (__v2df)_mm_undefined_pd(), \ - (__mmask8)-1)) +#define _mm512_extractf64x2_pd(A, imm) \ + ((__m128d)__builtin_ia32_extractf64x2_512_mask( \ + (__v8df)(__m512d)(A), (int)(imm), (__v2df)_mm_setzero_pd(), \ + (__mmask8) - 1)) #define _mm512_mask_extractf64x2_pd(W, U, A, imm) \ ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ @@ -1252,10 +1222,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) (__v2df)_mm_setzero_pd(), \ (__mmask8)(U))) -#define _mm512_extracti32x8_epi32(A, imm) \ - ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ - (__v8si)_mm256_undefined_si256(), \ - (__mmask8)-1)) +#define _mm512_extracti32x8_epi32(A, imm) \ + ((__m256i)__builtin_ia32_extracti32x8_mask( \ + (__v16si)(__m512i)(A), (int)(imm), (__v8si)_mm256_setzero_si256(), \ + (__mmask8) - 1)) #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \ ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ @@ -1267,11 +1237,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) (__v8si)_mm256_setzero_si256(), \ (__mmask8)(U))) -#define _mm512_extracti64x2_epi64(A, imm) \ - ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ - (int)(imm), \ - (__v2di)_mm_undefined_si128(), \ - (__mmask8)-1)) +#define _mm512_extracti64x2_epi64(A, imm) \ + ((__m128i)__builtin_ia32_extracti64x2_512_mask( \ + (__v8di)(__m512i)(A), (int)(imm), (__v2di)_mm_setzero_si128(), \ + (__mmask8) - 1)) #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \ ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ @@ -1375,5 +1344,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) #undef __DEFAULT_FN_ATTRS512 #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS512_CONSTEXPR +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif diff --git a/lib/include/avx512fintrin.h b/lib/include/avx512fintrin.h index 45e7eeb532..942ed72686 100644 --- a/lib/include/avx512fintrin.h +++ b/lib/include/avx512fintrin.h @@ -167,22 +167,23 @@ typedef enum } _MM_MANTISSA_SIGN_ENUM; /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,evex512"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS512 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \ + __min_vector_width__(512))) #define __DEFAULT_FN_ATTRS128 \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512f,no-evex512"), __min_vector_width__(128))) + __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \ + __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512f,no-evex512"))) + __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) #if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr #define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr #define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr #else -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS #define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 #endif /* Create vectors with repeated elements */ @@ -206,9 +207,7 @@ _mm512_undefined(void) return (__m512)__builtin_ia32_undef512(); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_undefined_ps(void) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void) { return (__m512)__builtin_ia32_undef512(); } @@ -218,48 +217,40 @@ _mm512_undefined_epi32(void) return (__m512i)__builtin_ia32_undef512(); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_broadcastd_epi32 (__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_broadcastd_epi32(__m128i __A) { return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512(__M, (__v16si) _mm512_broadcastd_epi32(__A), (__v16si) __O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512(__M, (__v16si) _mm512_broadcastd_epi32(__A), (__v16si) _mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_broadcastq_epi64 (__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_broadcastq_epi64(__m128i __A) { return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di) _mm512_broadcastq_epi64(__A), - (__v8di) __O); - +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_broadcastq_epi64(__A), (__v8di)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512(__M, (__v8di) _mm512_broadcastq_epi64(__A), (__v8di) _mm512_setzero_si512()); @@ -277,20 +268,20 @@ _mm512_setzero_pd(void) { return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; } -static __inline __m512 __DEFAULT_FN_ATTRS512 +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_ps(float __w) { return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512d __DEFAULT_FN_ATTRS512 +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_pd(double __w) { return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512i __DEFAULT_FN_ATTRS512 +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi8(char __w) { return __extension__ (__m512i)(__v64qi){ @@ -304,7 +295,7 @@ _mm512_set1_epi8(char __w) __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512i __DEFAULT_FN_ATTRS512 +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi16(short __w) { return __extension__ (__m512i)(__v32hi){ @@ -314,7 +305,7 @@ _mm512_set1_epi16(short __w) __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512i __DEFAULT_FN_ATTRS512 +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi32(int __s) { return __extension__ (__m512i)(__v16si){ @@ -322,81 +313,80 @@ _mm512_set1_epi32(int __s) __s, __s, __s, __s, __s, __s, __s, __s }; } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_set1_epi32(__mmask16 __M, int __A) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_set1_epi32(__mmask16 __M, int __A) { return (__m512i)__builtin_ia32_selectd_512(__M, (__v16si)_mm512_set1_epi32(__A), (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi64(long long __d) { return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_set1_epi64(__mmask8 __M, long long __A) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_set1_epi64(__mmask8 __M, long long __A) { return (__m512i)__builtin_ia32_selectq_512(__M, (__v8di)_mm512_set1_epi64(__A), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_broadcastss_ps(__m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_broadcastss_ps(__m128 __A) { return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_set4_epi32 (int __A, int __B, int __C, int __D) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_set4_epi32(int __A, int __B, int __C, int __D) { return __extension__ (__m512i)(__v16si) { __D, __C, __B, __A, __D, __C, __B, __A, __D, __C, __B, __A, __D, __C, __B, __A }; } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_set4_epi64 (long long __A, long long __B, long long __C, - long long __D) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D) { return __extension__ (__m512i) (__v8di) { __D, __C, __B, __A, __D, __C, __B, __A }; } -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_set4_pd (double __A, double __B, double __C, double __D) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_set4_pd(double __A, double __B, double __C, double __D) { return __extension__ (__m512d) { __D, __C, __B, __A, __D, __C, __B, __A }; } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_set4_ps (float __A, float __B, float __C, float __D) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_set4_ps(float __A, float __B, float __C, float __D) { return __extension__ (__m512) { __D, __C, __B, __A, __D, __C, __B, __A, __D, __C, __B, __A, __D, __C, __B, __A }; } -#define _mm512_setr4_epi32(e0,e1,e2,e3) \ - _mm512_set4_epi32((e3),(e2),(e1),(e0)) +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_setr4_epi32(int e0, int e1, int e2, int e3) { + return _mm512_set4_epi32(e3, e2, e1, e0); +} -#define _mm512_setr4_epi64(e0,e1,e2,e3) \ - _mm512_set4_epi64((e3),(e2),(e1),(e0)) +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3) { + return _mm512_set4_epi64(e3, e2, e1, e0); +} -#define _mm512_setr4_pd(e0,e1,e2,e3) \ - _mm512_set4_pd((e3),(e2),(e1),(e0)) +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_setr4_pd(double e0, double e1, double e2, double e3) { + return _mm512_set4_pd(e3, e2, e1, e0); +} -#define _mm512_setr4_ps(e0,e1,e2,e3) \ - _mm512_set4_ps((e3),(e2),(e1),(e0)) +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_setr4_ps(float e0, float e1, float e2, float e3) { + return _mm512_set4_ps(e3, e2, e1, e0); +} -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_broadcastsd_pd(__m128d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_broadcastsd_pd(__m128d __A) { return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A, 0, 0, 0, 0, 0, 0, 0, 0); } @@ -417,37 +407,36 @@ _mm512_castps256_ps512(__m256 __a) 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } -static __inline __m128d __DEFAULT_FN_ATTRS512 +static __inline __m128d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd512_pd128(__m512d __a) { return __builtin_shufflevector(__a, __a, 0, 1); } -static __inline __m256d __DEFAULT_FN_ATTRS512 +static __inline __m256d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd512_pd256 (__m512d __A) { return __builtin_shufflevector(__A, __A, 0, 1, 2, 3); } -static __inline __m128 __DEFAULT_FN_ATTRS512 +static __inline __m128 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps512_ps128(__m512 __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } -static __inline __m256 __DEFAULT_FN_ATTRS512 -_mm512_castps512_ps256 (__m512 __A) -{ +static __inline __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_castps512_ps256(__m512 __A) { return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7); } -static __inline __m512 __DEFAULT_FN_ATTRS512 +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd_ps (__m512d __A) { return (__m512) (__A); } -static __inline __m512i __DEFAULT_FN_ATTRS512 +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd_si512 (__m512d __A) { return (__m512i) (__A); @@ -462,13 +451,13 @@ _mm512_castpd128_pd512 (__m128d __A) __B, 0, 1, 2, 3, 4, 5, 6, 7); } -static __inline __m512d __DEFAULT_FN_ATTRS512 +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps_pd (__m512 __A) { return (__m512d) (__A); } -static __inline __m512i __DEFAULT_FN_ATTRS512 +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps_si512 (__m512 __A) { return (__m512i) (__A); @@ -498,39 +487,36 @@ _mm512_castsi256_si512 (__m256i __A) return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7); } -static __inline __m512 __DEFAULT_FN_ATTRS512 +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_ps (__m512i __A) { return (__m512) (__A); } -static __inline __m512d __DEFAULT_FN_ATTRS512 +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_pd (__m512i __A) { return (__m512d) (__A); } -static __inline __m128i __DEFAULT_FN_ATTRS512 +static __inline __m128i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_si128 (__m512i __A) { return (__m128i)__builtin_shufflevector(__A, __A , 0, 1); } -static __inline __m256i __DEFAULT_FN_ATTRS512 -_mm512_castsi512_si256 (__m512i __A) -{ +static __inline __m256i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_castsi512_si256(__m512i __A) { return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_int2mask(int __a) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_int2mask(int __a) { return (__mmask16)__a; } -static __inline__ int __DEFAULT_FN_ATTRS -_mm512_mask2int(__mmask16 __a) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_mask2int(__mmask16 __a) { return (int)__a; } @@ -547,9 +533,8 @@ _mm512_mask2int(__mmask16 __a) /// A 128-bit vector of [2 x double]. /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits /// contain the value of the parameter. The upper 384 bits are set to zero. -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_zextpd128_pd512(__m128d __a) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_zextpd128_pd512(__m128d __a) { return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3); } @@ -566,9 +551,8 @@ _mm512_zextpd128_pd512(__m128d __a) /// A 256-bit vector of [4 x double]. /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits /// contain the value of the parameter. The upper 256 bits are set to zero. -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_zextpd256_pd512(__m256d __a) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_zextpd256_pd512(__m256d __a) { return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7); } @@ -584,9 +568,8 @@ _mm512_zextpd256_pd512(__m256d __a) /// A 128-bit vector of [4 x float]. /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits /// contain the value of the parameter. The upper 384 bits are set to zero. -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_zextps128_ps512(__m128 __a) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_zextps128_ps512(__m128 __a) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7); } @@ -602,9 +585,8 @@ _mm512_zextps128_ps512(__m128 __a) /// A 256-bit vector of [8 x float]. /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits /// contain the value of the parameter. The upper 256 bits are set to zero. -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_zextps256_ps512(__m256 __a) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_zextps256_ps512(__m256 __a) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } @@ -620,9 +602,8 @@ _mm512_zextps256_ps512(__m256 __a) /// A 128-bit integer vector. /// \returns A 512-bit integer vector. The lower 128 bits contain the value of /// the parameter. The upper 384 bits are set to zero. -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_zextsi128_si512(__m128i __a) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_zextsi128_si512(__m128i __a) { return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3); } @@ -638,22 +619,20 @@ _mm512_zextsi128_si512(__m128i __a) /// A 256-bit integer vector. /// \returns A 512-bit integer vector. The lower 256 bits contain the value of /// the parameter. The upper 256 bits are set to zero. -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_zextsi256_si512(__m256i __a) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_zextsi256_si512(__m256i __a) { return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7); } /* Bitwise operators */ -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi32(__m512i __a, __m512i __b) { return (__m512i)((__v16su)__a & (__v16su)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, (__v16si) _mm512_and_epi32(__a, __b), (__v16si) __src); @@ -666,18 +645,16 @@ _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi64(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a & (__v8du)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) -{ - return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k, - (__v8di) _mm512_and_epi64(__a, __b), - (__v8di) __src); +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) { + return (__m512i)__builtin_ia32_selectq_512( + (__mmask8)__k, (__v8di)_mm512_and_epi64(__a, __b), (__v8di)__src); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -687,13 +664,13 @@ _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_si512 (__m512i __A, __m512i __B) { return (__m512i)(~(__v8du)__A & (__v8du)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi32 (__m512i __A, __m512i __B) { return (__m512i)(~(__v16su)__A & (__v16su)__B); @@ -714,7 +691,7 @@ _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B) __U, __A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi64(__m512i __A, __m512i __B) { return (__m512i)(~(__v8du)__A & (__v8du)__B); @@ -735,7 +712,7 @@ _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B) __U, __A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi32(__m512i __a, __m512i __b) { return (__m512i)((__v16su)__a | (__v16su)__b); @@ -755,7 +732,7 @@ _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi64(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a | (__v8du)__b); @@ -775,7 +752,7 @@ _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi32(__m512i __a, __m512i __b) { return (__m512i)((__v16su)__a ^ (__v16su)__b); @@ -795,7 +772,7 @@ _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi64(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a ^ (__v8du)__b); @@ -815,19 +792,19 @@ _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_si512(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a & (__v8du)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_si512(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a | (__v8du)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_si512(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a ^ (__v8du)__b); @@ -835,125 +812,107 @@ _mm512_xor_si512(__m512i __a, __m512i __b) /* Arithmetic */ -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_add_pd(__m512d __a, __m512d __b) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_add_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a + (__v8df)__b); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_add_ps(__m512 __a, __m512 __b) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_add_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a + (__v16sf)__b); } -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_mul_pd(__m512d __a, __m512d __b) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mul_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a * (__v8df)__b); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_mul_ps(__m512 __a, __m512 __b) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mul_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a * (__v16sf)__b); } -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_sub_pd(__m512d __a, __m512d __b) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sub_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a - (__v8df)__b); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_sub_ps(__m512 __a, __m512 __b) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sub_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a - (__v16sf)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_add_epi64 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_add_epi64(__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A + (__v8du) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_add_epi64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_add_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_sub_epi64 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sub_epi64(__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A - (__v8du) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_sub_epi64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_sub_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_add_epi32 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_add_epi32(__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A + (__v16su) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_add_epi32(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_add_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_sub_epi32 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sub_epi32(__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A - (__v16su) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sub_epi32(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sub_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); @@ -973,24 +932,21 @@ _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) (__v8df)_mm512_max_round_pd((A), (B), (R)), \ (__v8df)_mm512_setzero_pd())) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_max_pd(__m512d __A, __m512d __B) -{ +static __inline__ __m512d + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_pd(__m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_max_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_max_pd(__A, __B), (__v8df)_mm512_setzero_pd()); @@ -1010,31 +966,30 @@ _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps())) -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_max_ps(__m512 __A, __m512 __B) -{ +static __inline__ __m512 + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_ps(__m512 __A, __m512 __B) { return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_max_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_max_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, + __mmask8 __U, + __m128 __A, + __m128 __B) { return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) __W, @@ -1042,8 +997,9 @@ _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U, + __m128 __A, + __m128 __B) { return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), @@ -1069,8 +1025,10 @@ _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, + __mmask8 __U, + __m128d __A, + __m128d __B) { return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, (__v2df) __W, @@ -1078,8 +1036,9 @@ _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U, + __m128d __A, + __m128d __B) { return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), @@ -1106,89 +1065,76 @@ _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { (__mmask8)(U), (int)(R))) static __inline __m512i -__DEFAULT_FN_ATTRS512 -_mm512_max_epi32(__m512i __A, __m512i __B) -{ + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_max_epi32(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_max_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_max_epu32(__m512i __A, __m512i __B) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_max_epu32(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_max_epu32(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_max_epu32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_max_epi64(__m512i __A, __m512i __B) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_max_epi64(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_max_epi64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_max_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_max_epu64(__m512i __A, __m512i __B) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_max_epu64(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_max_epu64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_max_epu64(__A, __B), (__v8di)_mm512_setzero_si512()); @@ -1208,24 +1154,21 @@ _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) (__v8df)_mm512_min_round_pd((A), (B), (R)), \ (__v8df)_mm512_setzero_pd())) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_min_pd(__m512d __A, __m512d __B) -{ +static __inline__ __m512d + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_pd(__m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_min_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_min_pd(__A, __B), (__v8df)_mm512_setzero_pd()); @@ -1245,31 +1188,30 @@ _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps())) -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_min_ps(__m512 __A, __m512 __B) -{ +static __inline__ __m512 + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_ps(__m512 __A, __m512 __B) { return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_min_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_min_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, + __mmask8 __U, + __m128 __A, + __m128 __B) { return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) __W, @@ -1277,8 +1219,9 @@ _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U, + __m128 __A, + __m128 __B) { return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), @@ -1304,8 +1247,10 @@ _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, + __mmask8 __U, + __m128d __A, + __m128d __B) { return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, (__v2df) __W, @@ -1313,8 +1258,9 @@ _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U, + __m128d __A, + __m128d __B) { return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), @@ -1341,166 +1287,144 @@ _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { (__mmask8)(U), (int)(R))) static __inline __m512i -__DEFAULT_FN_ATTRS512 -_mm512_min_epi32(__m512i __A, __m512i __B) -{ + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_min_epi32(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_min_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_min_epu32(__m512i __A, __m512i __B) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_min_epu32(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_min_epu32(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_min_epu32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_min_epi64(__m512i __A, __m512i __B) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_min_epi64(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_min_epi64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_min_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_min_epu64(__m512i __A, __m512i __B) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_min_epu64(__m512i __A, __m512i __B) { return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_min_epu64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_min_epu64(__A, __B), (__v8di)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_mul_epi32(__m512i __X, __m512i __Y) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mul_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_mul_epi32(__X, __Y), (__v8di)__W); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_mul_epi32(__X, __Y), (__v8di)_mm512_setzero_si512 ()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_mul_epu32(__m512i __X, __m512i __Y) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mul_epu32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_mul_epu32(__X, __Y), (__v8di)__W); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_mul_epu32(__X, __Y), (__v8di)_mm512_setzero_si512 ()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_mullo_epi32 (__m512i __A, __m512i __B) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mullo_epi32(__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A * (__v16su) __B); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_mullo_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_mullo_epi32(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mullox_epi64 (__m512i __A, __m512i __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mullox_epi64(__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A * (__v8du) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_mullox_epi64(__A, __B), @@ -1520,26 +1444,19 @@ _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { (__v8df)_mm512_sqrt_round_pd((A), (R)), \ (__v8df)_mm512_setzero_pd())) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_sqrt_pd(__m512d __A) -{ - return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A) { + return (__m512d)__builtin_elementwise_sqrt((__v8df)__A); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) -{ - return (__m512d)__builtin_ia32_selectpd_512(__U, - (__v8df)_mm512_sqrt_pd(__A), +_mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A) { + return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) -{ - return (__m512d)__builtin_ia32_selectpd_512(__U, - (__v8df)_mm512_sqrt_pd(__A), +_mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A) { + return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A), (__v8df)_mm512_setzero_pd()); } @@ -1556,26 +1473,19 @@ _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ (__v16sf)_mm512_setzero_ps())) -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_sqrt_ps(__m512 __A) -{ - return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A) { + return (__m512)__builtin_elementwise_sqrt((__v16sf)__A); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) -{ - return (__m512)__builtin_ia32_selectps_512(__U, - (__v16sf)_mm512_sqrt_ps(__A), +static __inline__ __m512 __DEFAULT_FN_ATTRS512 +_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) { + return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A) -{ - return (__m512)__builtin_ia32_selectps_512(__U, - (__v16sf)_mm512_sqrt_ps(__A), +static __inline__ __m512 __DEFAULT_FN_ATTRS512 +_mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A) { + return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A), (__v16sf)_mm512_setzero_ps()); } @@ -1866,58 +1776,52 @@ _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_abs_epi64(__m512i __A) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_abs_epi64(__m512i __A) { return (__m512i)__builtin_elementwise_abs((__v8di)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_abs_epi64(__A), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_abs_epi64(__A), (__v8di)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_abs_epi32(__m512i __A) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_abs_epi32(__m512i __A) { return (__m512i)__builtin_elementwise_abs((__v16si) __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_abs_epi32(__A), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_abs_epi32(__A), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { __A = _mm_add_ss(__A, __B); return __builtin_ia32_selectss_128(__U, __A, __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) { __A = _mm_add_ss(__A, __B); return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); } @@ -1940,14 +1844,14 @@ _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) { (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { __A = _mm_add_sd(__A, __B); return __builtin_ia32_selectsd_128(__U, __A, __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) { __A = _mm_add_sd(__A, __B); return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); } @@ -1969,28 +1873,28 @@ _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) { (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_add_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_add_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_add_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_add_ps(__A, __B), @@ -2025,14 +1929,14 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps())) -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { __A = _mm_sub_ss(__A, __B); return __builtin_ia32_selectss_128(__U, __A, __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) { __A = _mm_sub_ss(__A, __B); return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); } @@ -2054,14 +1958,14 @@ _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) { (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { __A = _mm_sub_sd(__A, __B); return __builtin_ia32_selectsd_128(__U, __A, __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) { __A = _mm_sub_sd(__A, __B); return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); } @@ -2084,28 +1988,28 @@ _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) { (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_sub_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_sub_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_sub_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_sub_ps(__A, __B), @@ -2140,14 +2044,14 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps())) -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { __A = _mm_mul_ss(__A, __B); return __builtin_ia32_selectss_128(__U, __A, __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) { __A = _mm_mul_ss(__A, __B); return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); } @@ -2169,14 +2073,14 @@ _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) { (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { __A = _mm_mul_sd(__A, __B); return __builtin_ia32_selectsd_128(__U, __A, __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) { __A = _mm_mul_sd(__A, __B); return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); } @@ -2199,28 +2103,28 @@ _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) { (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_mul_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_mul_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_mul_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_mul_ps(__A, __B), @@ -2255,14 +2159,14 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps())) -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { __A = _mm_div_ss(__A, __B); return __builtin_ia32_selectss_128(__U, __A, __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) { __A = _mm_div_ss(__A, __B); return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); } @@ -2285,14 +2189,14 @@ _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) { (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { __A = _mm_div_sd(__A, __B); return __builtin_ia32_selectsd_128(__U, __A, __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) { __A = _mm_div_sd(__A, __B); return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); } @@ -2315,40 +2219,38 @@ _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_div_pd(__m512d __a, __m512d __b) -{ +static __inline __m512d + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a/(__v8df)__b); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_div_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_div_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_div_ps(__m512 __a, __m512 __b) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_div_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a/(__v16sf)__b); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_div_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_div_ps(__A, __B), @@ -2530,125 +2432,104 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { -(__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) - -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) -{ - return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) { + return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B, + (__v8df)__C); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) -{ - return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { + return (__m512d)__builtin_ia32_selectpd_512( + (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), (__v8df)__A); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) -{ - return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { + return (__m512d)__builtin_ia32_selectpd_512( + (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), (__v8df)__C); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) -{ - return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { + return (__m512d)__builtin_ia32_selectpd_512( + (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), + (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) -{ - return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, - (__v8df) __B, - -(__v8df) __C, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) { + return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B, + -(__v8df)__C); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) -{ - return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, - (__v8df) __B, - -(__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { + return (__m512d)__builtin_ia32_selectpd_512( + (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), (__v8df)__A); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) -{ - return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, - (__v8df) __B, - -(__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { + return (__m512d)__builtin_ia32_selectpd_512( + (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), (__v8df)__C); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) -{ - return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, - -(__v8df) __B, - (__v8df) __C, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { + return (__m512d)__builtin_ia32_selectpd_512( + (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), + (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) -{ - return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) { + return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B, + (__v8df)__C); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) -{ - return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { + return (__m512d)__builtin_ia32_selectpd_512( + (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), (__v8df)__A); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) -{ - return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, - -(__v8df) __B, - -(__v8df) __C, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { + return (__m512d)__builtin_ia32_selectpd_512( + (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), (__v8df)__C); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) -{ - return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, - (__v8df) __B, - -(__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { + return (__m512d)__builtin_ia32_selectpd_512( + (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), + (__v8df)_mm512_setzero_pd()); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) { + return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B, + -(__v8df)__C); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { + return (__m512d)__builtin_ia32_selectpd_512( + (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), (__v8df)__A); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { + return (__m512d)__builtin_ia32_selectpd_512( + (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), (__v8df)__C); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { + return (__m512d)__builtin_ia32_selectpd_512( + (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), + (__v8df)_mm512_setzero_pd()); } #define _mm512_fmadd_round_ps(A, B, C, R) \ @@ -2734,125 +2615,104 @@ _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) -(__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) - -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) -{ - return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) { + return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, + (__v16sf)__C); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) -{ - return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { + return (__m512)__builtin_ia32_selectps_512( + (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), (__v16sf)__A); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) -{ - return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { + return (__m512)__builtin_ia32_selectps_512( + (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), (__v16sf)__C); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) -{ - return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { + return (__m512)__builtin_ia32_selectps_512( + (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), + (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) -{ - return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, - (__v16sf) __B, - -(__v16sf) __C, - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) { + return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, + -(__v16sf)__C); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) -{ - return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, - (__v16sf) __B, - -(__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { + return (__m512)__builtin_ia32_selectps_512( + (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), (__v16sf)__A); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) -{ - return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, - (__v16sf) __B, - -(__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { + return (__m512)__builtin_ia32_selectps_512( + (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), (__v16sf)__C); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) -{ - return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, - -(__v16sf) __B, - (__v16sf) __C, - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { + return (__m512)__builtin_ia32_selectps_512( + (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), + (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) -{ - return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) { + return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, + (__v16sf)__C); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) -{ - return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { + return (__m512)__builtin_ia32_selectps_512( + (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__A); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) -{ - return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, - -(__v16sf) __B, - -(__v16sf) __C, - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { + return (__m512)__builtin_ia32_selectps_512( + (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__C); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) -{ - return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, - (__v16sf) __B, - -(__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { + return (__m512)__builtin_ia32_selectps_512( + (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), + (__v16sf)_mm512_setzero_ps()); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) { + return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, + -(__v16sf)__C); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { + return (__m512)__builtin_ia32_selectps_512( + (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__A); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { + return (__m512)__builtin_ia32_selectps_512( + (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__C); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { + return (__m512)__builtin_ia32_selectps_512( + (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), + (__v16sf)_mm512_setzero_ps()); } #define _mm512_fmaddsub_round_pd(A, B, C, R) \ @@ -3099,33 +2959,12 @@ _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) (__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) - -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) -{ - return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); -} - #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \ (__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) -{ - return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); -} - #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \ ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ @@ -3166,34 +3005,12 @@ _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) (__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) - -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) -{ - return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, - -(__v8df) __B, - (__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); -} - #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ -(__v16sf)(__m512)(B), \ (__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) - -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) -{ - return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, - -(__v16sf) __B, - (__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); -} - #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ -(__v8df)(__m512d)(B), \ @@ -3207,27 +3024,6 @@ _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) (__v8df)(__m512d)(C), \ (__mmask8)(U), (int)(R))) - -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) -{ - return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, - -(__v8df) __B, - -(__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); -} - -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) -{ - return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); -} - #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ -(__v16sf)(__m512)(B), \ @@ -3241,94 +3037,63 @@ _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) (__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R))) - -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) -{ - return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, - -(__v16sf) __B, - -(__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); -} - -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) -{ - return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); -} - - - /* Vector permutations */ -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I, (__v16si) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, - __m512i __B) -{ + __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_permutex2var_epi32(__A, __I, __B), (__v16si)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, - __m512i __B) -{ + __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_permutex2var_epi32(__A, __I, __B), (__v16si)__I); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, - __m512i __B) -{ + __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_permutex2var_epi32(__A, __I, __B), (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I, (__v8di) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, - __m512i __B) -{ + __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, (__v8di)_mm512_permutex2var_epi64(__A, __I, __B), (__v8di)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, - __m512i __B) -{ + __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, (__v8di)_mm512_permutex2var_epi64(__A, __I, __B), (__v8di)__I); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, - __m512i __B) -{ + __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, (__v8di)_mm512_permutex2var_epi64(__A, __I, __B), (__v8di)_mm512_setzero_si512()); @@ -3363,10 +3128,10 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, (__v16si)_mm512_setzero_si512())) /* Vector Extract */ -#define _mm512_extractf64x4_pd(A, I) \ - ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \ - (__v4df)_mm256_undefined_pd(), \ - (__mmask8)-1)) +#define _mm512_extractf64x4_pd(A, I) \ + ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \ + (__v4df)_mm256_setzero_pd(), \ + (__mmask8) - 1)) #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \ ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ @@ -3378,10 +3143,10 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, (__v4df)_mm256_setzero_pd(), \ (__mmask8)(U))) -#define _mm512_extractf32x4_ps(A, I) \ - ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \ - (__v4sf)_mm_undefined_ps(), \ - (__mmask8)-1)) +#define _mm512_extractf32x4_ps(A, I) \ + ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8) - 1)) #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \ ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ @@ -3395,33 +3160,29 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, /* Vector Blend */ -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) { return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, (__v8df) __W, (__v8df) __A); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) { return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, (__v16sf) __W, (__v16sf) __A); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, (__v8di) __W, (__v8di) __A); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, (__v16si) __W, (__v16si) __A); @@ -3615,115 +3376,99 @@ _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R))) -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_cvtepu32_ps (__m512i __A) -{ +static __inline__ __m512 + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_ps(__m512i __A) { return (__m512)__builtin_convertvector((__v16su)__A, __v16sf); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_cvtepu32_ps(__A), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_cvtepu32_ps(__A), (__v16sf)_mm512_setzero_ps()); } -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_cvtepi32_pd(__m256i __A) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepi32_pd(__m256i __A) { return (__m512d)__builtin_convertvector((__v8si)__A, __v8df); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, (__v8df)_mm512_cvtepi32_pd(__A), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, (__v8df)_mm512_cvtepi32_pd(__A), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_cvtepi32lo_pd(__m512i __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepi32lo_pd(__m512i __A) { return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A)); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) { return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A)); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_cvtepi32_ps (__m512i __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepi32_ps(__m512i __A) { return (__m512)__builtin_convertvector((__v16si)__A, __v16sf); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_cvtepi32_ps(__A), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_cvtepi32_ps(__A), (__v16sf)_mm512_setzero_ps()); } -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_cvtepu32_pd(__m256i __A) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepu32_pd(__m256i __A) { return (__m512d)__builtin_convertvector((__v8su)__A, __v8df); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, (__v8df)_mm512_cvtepu32_pd(__A), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, (__v8df)_mm512_cvtepu32_pd(__A), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_cvtepu32lo_pd(__m512i __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepu32lo_pd(__m512i __A) { return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A)); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) { return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A)); } @@ -3742,44 +3487,38 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m256 __DEFAULT_FN_ATTRS512 -_mm512_cvtpd_ps (__m512d __A) -{ - return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, - (__v8sf) _mm256_undefined_ps (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); +static __inline__ __m256 + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A) { + return (__m256)__builtin_ia32_cvtpd2ps512_mask( + (__v8df)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256 __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A) { return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, (__v8sf) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256 __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A) { return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_cvtpd_pslo (__m512d __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtpd_pslo(__m512d __A) { return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A), (__v8sf) _mm256_setzero_ps (), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A) { return (__m512) __builtin_shufflevector ( (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W), __U, __A), @@ -4123,9 +3862,8 @@ _mm512_cvtss_f32(__m512 __a) /* Unpack and Interleave */ -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_unpackhi_pd(__m512d __a, __m512d __b) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_unpackhi_pd(__m512d __a, __m512d __b) { return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); } @@ -4146,9 +3884,8 @@ _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B) (__v8df)_mm512_setzero_pd()); } -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_unpacklo_pd(__m512d __a, __m512d __b) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_unpacklo_pd(__m512d __a, __m512d __b) { return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); } @@ -4169,9 +3906,8 @@ _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) (__v8df)_mm512_setzero_pd()); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_unpackhi_ps(__m512 __a, __m512 __b) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_unpackhi_ps(__m512 __a, __m512 __b) { return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, 2, 18, 3, 19, 2+4, 18+4, 3+4, 19+4, @@ -4195,9 +3931,8 @@ _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) (__v16sf)_mm512_setzero_ps()); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_unpacklo_ps(__m512 __a, __m512 __b) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_unpacklo_ps(__m512 __a, __m512 __b) { return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, 0, 16, 1, 17, 0+4, 16+4, 1+4, 17+4, @@ -4221,9 +3956,8 @@ _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B) (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_unpackhi_epi32(__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_unpackhi_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, 2, 18, 3, 19, 2+4, 18+4, 3+4, 19+4, @@ -4247,9 +3981,8 @@ _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_unpacklo_epi32(__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_unpacklo_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, 0, 16, 1, 17, 0+4, 16+4, 1+4, 17+4, @@ -4273,9 +4006,8 @@ _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_unpackhi_epi64(__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_unpackhi_epi64(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); } @@ -4296,9 +4028,8 @@ _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_unpacklo_epi64 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_unpacklo_epi64(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); } @@ -4619,9 +4350,8 @@ _mm512_store_epi64 (void *__P, __m512i __A) /* Mask ops */ -static __inline __mmask16 __DEFAULT_FN_ATTRS -_mm512_knot(__mmask16 __M) -{ +static __inline __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_knot(__mmask16 __M) { return __builtin_ia32_knothi(__M); } @@ -4727,237 +4457,207 @@ _mm512_knot(__mmask16 __M) #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \ _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepi8_epi32(__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepi8_epi32(__m128i __A) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_cvtepi8_epi32(__A), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_cvtepi8_epi32(__A), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepi8_epi64(__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepi8_epi64(__m128i __A) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepi8_epi64(__A), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepi8_epi64(__A), (__v8di)_mm512_setzero_si512 ()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepi32_epi64(__m256i __X) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepi32_epi64(__m256i __X) { return (__m512i)__builtin_convertvector((__v8si)__X, __v8di); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepi32_epi64(__X), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepi32_epi64(__X), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepi16_epi32(__m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepi16_epi32(__m256i __A) { return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_cvtepi16_epi32(__A), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_cvtepi16_epi32(__A), (__v16si)_mm512_setzero_si512 ()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepi16_epi64(__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepi16_epi64(__m128i __A) { return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepi16_epi64(__A), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepi16_epi64(__A), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepu8_epi32(__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepu8_epi32(__m128i __A) { return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_cvtepu8_epi32(__A), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_cvtepu8_epi32(__A), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepu8_epi64(__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepu8_epi64(__m128i __A) { return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepu8_epi64(__A), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepu8_epi64(__A), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepu32_epi64(__m256i __X) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepu32_epi64(__m256i __X) { return (__m512i)__builtin_convertvector((__v8su)__X, __v8di); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepu32_epi64(__X), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepu32_epi64(__X), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepu16_epi32(__m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepu16_epi32(__m256i __A) { return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_cvtepu16_epi32(__A), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_cvtepu16_epi32(__A), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtepu16_epi64(__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtepu16_epi64(__m128i __A) { return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepu16_epi64(__A), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_cvtepu16_epi64(__A), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi32 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B); + return (__m512i)__builtin_elementwise_fshr((__v16su)__A,(__v16su)__A, (__v16su)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, @@ -4965,7 +4665,7 @@ _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, @@ -4973,13 +4673,13 @@ _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi64 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B); + return (__m512i)__builtin_elementwise_fshr((__v8du)__A, (__v8du)__A, (__v8du)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -4987,7 +4687,7 @@ _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -5063,13 +4763,13 @@ _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) (__v8di)_mm512_rol_epi64((a), (b)), \ (__v8di)_mm512_setzero_si512())) -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi32 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B); + return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__A, (__v16su)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, @@ -5077,7 +4777,7 @@ _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, @@ -5085,13 +4785,13 @@ _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi64 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B); + return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__A, (__v8du)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -5099,7 +4799,7 @@ _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -5133,91 +4833,81 @@ _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) (__v8di)_mm512_ror_epi64((A), (B)), \ (__v8di)_mm512_setzero_si512())) -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_slli_epi32(__m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_slli_epi32(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, - unsigned int __B) -{ + unsigned int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_slli_epi32(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_slli_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_slli_epi64(__m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_slli_epi64(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, + unsigned int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_slli_epi64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_slli_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srli_epi32(__m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srli_epi32(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, - unsigned int __B) -{ + unsigned int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srli_epi32(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srli_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srli_epi64(__m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srli_epi64(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, - unsigned int __B) -{ + unsigned int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srli_epi64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, - unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srli_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); @@ -5303,7 +4993,7 @@ _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movedup_pd (__m512d __A) { return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A, @@ -5665,9 +5355,8 @@ _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_kmov (__mmask16 __A) -{ +static __inline__ __mmask16 + __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kmov(__mmask16 __A) { return __A; } @@ -5684,79 +5373,70 @@ _mm512_kmov (__mmask16 __A) ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))) #endif -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_sll_epi32(__m512i __A, __m128i __B) -{ +static __inline__ __m512i + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sll_epi32(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sll_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_sll_epi64(__m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sll_epi64(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_sll_epi64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_sll_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_sllv_epi32(__m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sllv_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sllv_epi32(__X, __Y), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sllv_epi32(__X, __Y), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi64(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5764,7 +5444,7 @@ _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5772,79 +5452,70 @@ _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_sra_epi32(__m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sra_epi32(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sra_epi32(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sra_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_sra_epi64(__m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sra_epi64(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_sra_epi64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_sra_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srav_epi32(__m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srav_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srav_epi32(__X, __Y), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srav_epi32(__X, __Y), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi64(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5852,7 +5523,7 @@ _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5860,79 +5531,70 @@ _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srl_epi32(__m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srl_epi32(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srl_epi32(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srl_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srl_epi64(__m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srl_epi64(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srl_epi64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srl_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srlv_epi32(__m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srlv_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srlv_epi32(__X, __Y), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srlv_epi32(__X, __Y), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi64 (__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5940,7 +5602,7 @@ _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -6190,115 +5852,104 @@ _mm_cvttss_u64 (__m128 __A) (__v16sf)_mm512_permute_ps((X), (C)), \ (__v16sf)_mm512_setzero_ps())) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_permutevar_pd(__m512d __A, __m512i __C) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutevar_pd(__m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_permutevar_pd(__A, __C), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_permutevar_pd(__A, __C), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_permutevar_ps(__m512 __A, __m512i __C) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutevar_ps(__m512 __A, __m512i __C) { return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_permutevar_ps(__A, __C), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_permutevar_ps(__A, __C), (__v16sf)_mm512_setzero_ps()); } -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) { return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I, (__v8df)__B); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, + __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_permutex2var_pd(__A, __I, __B), (__v8df)__A); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, - __m512d __B) -{ + __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_permutex2var_pd(__A, __I, __B), (__v8df)(__m512d)__I); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, - __m512d __B) -{ + __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_permutex2var_pd(__A, __I, __B), (__v8df)_mm512_setzero_pd()); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) { return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I, (__v16sf) __B); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, + __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_permutex2var_ps(__A, __I, __B), (__v16sf)__A); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, + __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_permutex2var_ps(__A, __I, __B), (__v16sf)(__m512)__I); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, + __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_permutex2var_ps(__A, __I, __B), (__v16sf)_mm512_setzero_ps()); } - #define _mm512_cvtt_roundpd_epu32(A, R) \ ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_undefined_si256(), \ @@ -6622,46 +6273,41 @@ _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) (__mmask8)(U), \ (int)(R))) -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srai_epi32(__m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srai_epi32(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, - unsigned int __B) -{ + unsigned int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srai_epi32(__A, __B), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, - unsigned int __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srai_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_srai_epi64(__m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_srai_epi64(__m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, + unsigned int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srai_epi64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_srai_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); @@ -6827,33 +6473,29 @@ _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_broadcast_f32x4(__m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_broadcast_f32x4(__m128 __A) { return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x4(__A), (__v16sf)__O); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x4(__A), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_broadcast_f64x4(__m256d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_broadcast_f64x4(__m256d __A) { return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A, 0, 1, 2, 3, 0, 1, 2, 3); } @@ -6874,33 +6516,29 @@ _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A) (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_broadcast_i32x4(__m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_broadcast_i32x4(__m128i __A) { return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x4(__A), (__v16si)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x4(__A), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_broadcast_i64x4(__m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_broadcast_i64x4(__m256i __A) { return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A, 0, 1, 2, 3, 0, 1, 2, 3); } @@ -6921,33 +6559,29 @@ _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512(__M, (__v8df) _mm512_broadcastsd_pd(__A), (__v8df) __O); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512(__M, (__v8df) _mm512_broadcastsd_pd(__A), (__v8df) _mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512(__M, (__v16sf) _mm512_broadcastss_ps(__A), (__v16sf) __O); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512(__M, (__v16sf) _mm512_broadcastss_ps(__A), (__v16sf) _mm512_setzero_ps()); @@ -7391,10 +7025,10 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); } -#define _mm512_extracti32x4_epi32(A, imm) \ - ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ - (__v4si)_mm_undefined_si128(), \ - (__mmask8)-1)) +#define _mm512_extracti32x4_epi32(A, imm) \ + ((__m128i)__builtin_ia32_extracti32x4_mask( \ + (__v16si)(__m512i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \ + (__mmask8) - 1)) #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \ ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ @@ -7406,10 +7040,10 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) (__v4si)_mm_setzero_si128(), \ (__mmask8)(U))) -#define _mm512_extracti64x4_epi64(A, imm) \ +#define _mm512_extracti64x4_epi64(A, imm) \ ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ - (__v4di)_mm256_undefined_si256(), \ - (__mmask8)-1)) + (__v4di)_mm256_setzero_si256(), \ + (__mmask8) - 1)) #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \ ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ @@ -8274,93 +7908,82 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) (__v8di)_mm512_permutex_epi64((X), (C)), \ (__v8di)_mm512_setzero_si512())) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_permutexvar_pd (__m512i __X, __m512d __Y) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutexvar_pd(__m512i __X, __m512d __Y) { return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, + __m512d __Y) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_permutexvar_pd(__X, __Y), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_permutexvar_pd(__X, __Y), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutexvar_epi64(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_permutexvar_epi64(__X, __Y), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, - __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, + __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_permutexvar_epi64(__X, __Y), (__v8di)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_permutexvar_ps (__m512i __X, __m512 __Y) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutexvar_ps(__m512i __X, __m512 __Y) { return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_permutexvar_ps(__X, __Y), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_permutexvar_ps(__X, __Y), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_permutexvar_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X); } #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32 -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_permutexvar_epi32(__X, __Y), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, - __m512i __Y) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, + __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_permutexvar_epi32(__X, __Y), (__v16si)__W); @@ -8368,69 +7991,59 @@ _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32 -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_kand (__mmask16 __A, __mmask16 __B) -{ +static __inline__ __mmask16 + __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kand(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_kandn (__mmask16 __A, __mmask16 __B) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kandn(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_kor (__mmask16 __A, __mmask16 __B) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kor(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ int __DEFAULT_FN_ATTRS -_mm512_kortestc (__mmask16 __A, __mmask16 __B) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kortestc(__mmask16 __A, __mmask16 __B) { return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ int __DEFAULT_FN_ATTRS -_mm512_kortestz (__mmask16 __A, __mmask16 __B) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kortestz(__mmask16 __A, __mmask16 __B) { return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_kortestc_mask16_u8(__mmask16 __A, __mmask16 __B) -{ +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR +_kortestc_mask16_u8(__mmask16 __A, __mmask16 __B) { return (unsigned char)__builtin_ia32_kortestchi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS -_kortestz_mask16_u8(__mmask16 __A, __mmask16 __B) -{ +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR +_kortestz_mask16_u8(__mmask16 __A, __mmask16 __B) { return (unsigned char)__builtin_ia32_kortestzhi(__A, __B); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) { *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B); return (unsigned char)__builtin_ia32_kortestzhi(__A, __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_kunpackb (__mmask16 __A, __mmask16 __B) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kunpackb(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_kxnor (__mmask16 __A, __mmask16 __B) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kxnor(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_kxor (__mmask16 __A, __mmask16 __B) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kxor(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); } @@ -8447,12 +8060,12 @@ _mm512_kxor (__mmask16 __A, __mmask16 __B) #define _kshiftri_mask16(A, I) \ ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I))) -static __inline__ unsigned int __DEFAULT_FN_ATTRS -_cvtmask16_u32(__mmask16 __A) { +static __inline__ unsigned int + __DEFAULT_FN_ATTRS_CONSTEXPR _cvtmask16_u32(__mmask16 __A) { return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _cvtu32_mask16(unsigned int __A) { return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A); } @@ -8665,74 +8278,66 @@ _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) _mm512_setzero_si512()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movehdup_ps (__m512 __A) { return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_movehdup_ps(__A), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_movehdup_ps(__A), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_moveldup_ps (__m512 __A) { return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_moveldup_ps(__A), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_moveldup_ps(__A), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), _mm_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), _mm_setzero_pd()); } @@ -8941,70 +8546,57 @@ _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_cvtps_pd (__m256 __A) -{ +static __inline__ __m512d + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtps_pd(__m256 __A) { return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_cvtps_pd(__A), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_cvtps_pd(__A), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_cvtpslo_pd (__m512 __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtpslo_pd(__m512 __A) { return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A)); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A) { return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A)); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) -{ - return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, - (__v8df) __A, - (__v8df) __W); +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A, + (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) -{ - return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, - (__v8df) __A, - (__v8df) _mm512_setzero_pd ()); +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_mov_pd(__mmask8 __U, __m512d __A) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A, + (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) -{ - return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, - (__v16sf) __A, - (__v16sf) __W); +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A, + (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) -{ - return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, - (__v16sf) __A, - (__v16sf) _mm512_setzero_ps ()); +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_mov_ps(__mmask16 __U, __m512 __A) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A, + (__v16sf)_mm512_setzero_ps()); } static __inline__ void __DEFAULT_FN_ATTRS512 @@ -9053,18 +8645,16 @@ _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) { return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, (__v2df)__B, (__v4sf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B) { return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, (__v2df)__B, (__v4sf)_mm_setzero_ps(), @@ -9188,34 +8778,32 @@ _mm_cvtu64_ss (__m128 __A, unsigned long long __B) } #endif -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A) { return (__m512i) __builtin_ia32_selectd_512(__M, (__v16si) _mm512_set1_epi32(__A), (__v16si) __O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A) { return (__m512i) __builtin_ia32_selectq_512(__M, (__v8di) _mm512_set1_epi64(__A), (__v8di) __O); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59, - char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, - char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, - char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, - char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, - char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, - char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, - char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, - char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, - char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, - char __e4, char __e3, char __e2, char __e1, char __e0) { +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi8( + char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, + char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, + char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, + char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, + char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, + char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, + char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, + char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, + char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, + char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, + char __e2, char __e1, char __e0) { return __extension__ (__m512i)(__v64qi) {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7, @@ -9228,14 +8816,13 @@ _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59, __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63}; } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, - short __e27, short __e26, short __e25, short __e24, short __e23, - short __e22, short __e21, short __e20, short __e19, short __e18, - short __e17, short __e16, short __e15, short __e14, short __e13, - short __e12, short __e11, short __e10, short __e9, short __e8, - short __e7, short __e6, short __e5, short __e4, short __e3, - short __e2, short __e1, short __e0) { +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi16( + short __e31, short __e30, short __e29, short __e28, short __e27, + short __e26, short __e25, short __e24, short __e23, short __e22, + short __e21, short __e20, short __e19, short __e18, short __e17, + short __e16, short __e15, short __e14, short __e13, short __e12, + short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, + short __e5, short __e4, short __e3, short __e2, short __e1, short __e0) { return __extension__ (__m512i)(__v32hi) {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7, __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15, @@ -9243,81 +8830,81 @@ _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 }; } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_set_epi32 (int __A, int __B, int __C, int __D, - int __E, int __F, int __G, int __H, - int __I, int __J, int __K, int __L, - int __M, int __N, int __O, int __P) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi32( + int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, + int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P) { return __extension__ (__m512i)(__v16si) { __P, __O, __N, __M, __L, __K, __J, __I, __H, __G, __F, __E, __D, __C, __B, __A }; } -#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ - e8,e9,e10,e11,e12,e13,e14,e15) \ - _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \ - (e5),(e4),(e3),(e2),(e1),(e0)) +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi32( + int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7, int e8, + int e9, int e10, int e11, int e12, int e13, int e14, int e15) { + return _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, + e3, e2, e1, e0); +} -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_set_epi64 (long long __A, long long __B, long long __C, - long long __D, long long __E, long long __F, - long long __G, long long __H) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, + long long __E, long long __F, long long __G, long long __H) { return __extension__ (__m512i) (__v8di) { __H, __G, __F, __E, __D, __C, __B, __A }; } -#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ - _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_setr_epi64(long long e0, long long e1, long long e2, long long e3, + long long e4, long long e5, long long e6, long long e7) { + return _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0); +} -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_set_pd (double __A, double __B, double __C, double __D, - double __E, double __F, double __G, double __H) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_set_pd(double __A, double __B, double __C, double __D, double __E, + double __F, double __G, double __H) { return __extension__ (__m512d) { __H, __G, __F, __E, __D, __C, __B, __A }; } -#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ - _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_setr_pd(double e0, double e1, double e2, double e3, double e4, double e5, + double e6, double e7) { + return _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0); +} -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_set_ps (float __A, float __B, float __C, float __D, - float __E, float __F, float __G, float __H, - float __I, float __J, float __K, float __L, - float __M, float __N, float __O, float __P) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, + float __G, float __H, float __I, float __J, float __K, float __L, + float __M, float __N, float __O, float __P) { return __extension__ (__m512) { __P, __O, __N, __M, __L, __K, __J, __I, __H, __G, __F, __E, __D, __C, __B, __A }; } -#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ - _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \ - (e4),(e3),(e2),(e1),(e0)) +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_setr_ps(float e0, float e1, float e2, float e3, float e4, float e5, + float e6, float e7, float e8, float e9, float e10, float e11, + float e12, float e13, float e14, float e15) { + return _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, + e2, e1, e0); +} -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_abs_ps(__m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_abs_ps(__m512 __A) { return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) { return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_abs_pd(__m512d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_abs_pd(__m512d __A) { return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ; } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) { return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A); } @@ -9337,19 +8924,23 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) * This takes log2(n) steps where n is the number of elements in the vector. */ -static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) { +static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_reduce_add_epi64(__m512i __W) { return __builtin_reduce_add((__v8di)__W); } -static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) { +static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_reduce_mul_epi64(__m512i __W) { return __builtin_reduce_mul((__v8di)__W); } -static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) { +static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_reduce_and_epi64(__m512i __W) { return __builtin_reduce_and((__v8di)__W); } -static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) { +static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_reduce_or_epi64(__m512i __W) { return __builtin_reduce_or((__v8di)__W); } @@ -9400,22 +8991,22 @@ _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) { return __builtin_ia32_reduce_fmul_pd512(1.0, __W); } -static __inline__ int __DEFAULT_FN_ATTRS512 +static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi32(__m512i __W) { return __builtin_reduce_add((__v16si)__W); } -static __inline__ int __DEFAULT_FN_ATTRS512 +static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi32(__m512i __W) { return __builtin_reduce_mul((__v16si)__W); } -static __inline__ int __DEFAULT_FN_ATTRS512 +static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi32(__m512i __W) { return __builtin_reduce_and((__v16si)__W); } -static __inline__ int __DEFAULT_FN_ATTRS512 +static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi32(__m512i __W) { return __builtin_reduce_or((__v16si)__W); } @@ -9466,22 +9057,22 @@ _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) { return __builtin_ia32_reduce_fmul_ps512(1.0f, __W); } -static __inline__ long long __DEFAULT_FN_ATTRS512 +static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi64(__m512i __V) { return __builtin_reduce_max((__v8di)__V); } -static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 +static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu64(__m512i __V) { return __builtin_reduce_max((__v8du)__V); } -static __inline__ long long __DEFAULT_FN_ATTRS512 +static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi64(__m512i __V) { return __builtin_reduce_min((__v8di)__V); } -static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 +static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu64(__m512i __V) { return __builtin_reduce_min((__v8du)__V); } @@ -9509,22 +9100,22 @@ _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) { __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V); return __builtin_reduce_min((__v8du)__V); } -static __inline__ int __DEFAULT_FN_ATTRS512 +static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi32(__m512i __V) { return __builtin_reduce_max((__v16si)__V); } -static __inline__ unsigned int __DEFAULT_FN_ATTRS512 +static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu32(__m512i __V) { return __builtin_reduce_max((__v16su)__V); } -static __inline__ int __DEFAULT_FN_ATTRS512 +static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi32(__m512i __V) { return __builtin_reduce_min((__v16si)__V); } -static __inline__ unsigned int __DEFAULT_FN_ATTRS512 +static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu32(__m512i __V) { return __builtin_reduce_min((__v16su)__V); } diff --git a/lib/include/avx512fp16intrin.h b/lib/include/avx512fp16intrin.h index 92df320b45..9a1d1930f6 100644 --- a/lib/include/avx512fp16intrin.h +++ b/lib/include/avx512fp16intrin.h @@ -22,26 +22,36 @@ typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1))); /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS512 \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512fp16,evex512"), __min_vector_width__(512))) + __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \ + __min_vector_width__(512))) #define __DEFAULT_FN_ATTRS256 \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512fp16,no-evex512"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \ __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512fp16,no-evex512"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \ __min_vector_width__(128))) -static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a) { +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#else +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#endif + +static __inline__ _Float16 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_cvtsh_h(__m512h __a) { return __a[0]; } -static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_setzero_ph(void) { +static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_setzero_ph(void) { return (__m128h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; } -static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_setzero_ph(void) { +static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_setzero_ph(void) { return (__m256h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; } @@ -50,7 +60,8 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_undefined_ph(void) { return (__m256h)__builtin_ia32_undef256(); } -static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_setzero_ph(void) { +static __inline __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_setzero_ph(void) { return (__m512h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; @@ -64,14 +75,15 @@ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_undefined_ph(void) { return (__m512h)__builtin_ia32_undef512(); } -static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_set1_ph(_Float16 __h) { +static __inline __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_set1_ph(_Float16 __h) { return (__m512h)(__v32hf){__h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h}; } -static __inline __m512h __DEFAULT_FN_ATTRS512 +static __inline __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8, _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12, @@ -87,106 +99,111 @@ _mm512_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, __h4, __h3, __h2, __h1}; } -#define _mm512_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, \ - h14, h15, h16, h17, h18, h19, h20, h21, h22, h23, h24, \ - h25, h26, h27, h28, h29, h30, h31, h32) \ - _mm512_set_ph((h32), (h31), (h30), (h29), (h28), (h27), (h26), (h25), (h24), \ - (h23), (h22), (h21), (h20), (h19), (h18), (h17), (h16), (h15), \ - (h14), (h13), (h12), (h11), (h10), (h9), (h8), (h7), (h6), \ - (h5), (h4), (h3), (h2), (h1)) +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_ph( + _Float16 e0, _Float16 e1, _Float16 e2, _Float16 e3, _Float16 e4, + _Float16 e5, _Float16 e6, _Float16 e7, _Float16 e8, _Float16 e9, + _Float16 e10, _Float16 e11, _Float16 e12, _Float16 e13, _Float16 e14, + _Float16 e15, _Float16 e16, _Float16 e17, _Float16 e18, _Float16 e19, + _Float16 e20, _Float16 e21, _Float16 e22, _Float16 e23, _Float16 e24, + _Float16 e25, _Float16 e26, _Float16 e27, _Float16 e28, _Float16 e29, + _Float16 e30, _Float16 e31) { + return _mm512_set_ph(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, + e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, + e9, e8, e7, e6, e5, e4, e3, e2, e1, e0); +} -static __inline __m512h __DEFAULT_FN_ATTRS512 +static __inline __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_pch(_Float16 _Complex __h) { return (__m512h)_mm512_set1_ps(__builtin_bit_cast(float, __h)); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_castph_ps(__m128h __a) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_castph_ps(__m128h __a) { return (__m128)__a; } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_castph_ps(__m256h __a) { +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_castph_ps(__m256h __a) { return (__m256)__a; } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castph_ps(__m512h __a) { +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castph_ps(__m512h __a) { return (__m512)__a; } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_castph_pd(__m128h __a) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_castph_pd(__m128h __a) { return (__m128d)__a; } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_castph_pd(__m256h __a) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_castph_pd(__m256h __a) { return (__m256d)__a; } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castph_pd(__m512h __a) { +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castph_pd(__m512h __a) { return (__m512d)__a; } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_castph_si128(__m128h __a) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_castph_si128(__m128h __a) { return (__m128i)__a; } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_castph_si256(__m256h __a) { return (__m256i)__a; } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castph_si512(__m512h __a) { return (__m512i)__a; } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castps_ph(__m128 __a) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_castps_ph(__m128 __a) { return (__m128h)__a; } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castps_ph(__m256 __a) { +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_castps_ph(__m256 __a) { return (__m256h)__a; } -static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castps_ph(__m512 __a) { +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps_ph(__m512 __a) { return (__m512h)__a; } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castpd_ph(__m128d __a) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_castpd_ph(__m128d __a) { return (__m128h)__a; } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castpd_ph(__m256d __a) { +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_castpd_ph(__m256d __a) { return (__m256h)__a; } -static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castpd_ph(__m512d __a) { +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd_ph(__m512d __a) { return (__m512h)__a; } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castsi128_ph(__m128i __a) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_castsi128_ph(__m128i __a) { return (__m128h)__a; } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_castsi256_ph(__m256i __a) { return (__m256h)__a; } -static __inline__ __m512h __DEFAULT_FN_ATTRS512 +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_ph(__m512i __a) { return (__m512h)__a; } -static __inline__ __m128h __DEFAULT_FN_ATTRS256 +static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_castph256_ph128(__m256h __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); } -static __inline__ __m128h __DEFAULT_FN_ATTRS512 +static __inline__ __m128h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castph512_ph128(__m512h __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); } -static __inline__ __m256h __DEFAULT_FN_ATTRS512 +static __inline__ __m256h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castph512_ph256(__m512h __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -229,7 +246,7 @@ _mm512_castph256_ph512(__m256h __a) { /// A 128-bit vector of [8 x half]. /// \returns A 512-bit floating-point vector of [16 x half]. The lower 128 bits /// contain the value of the parameter. The upper 384 bits are set to zero. -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_zextph128_ph256(__m128h __a) { return __builtin_shufflevector(__a, (__v8hf)_mm_setzero_ph(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -248,7 +265,7 @@ _mm256_zextph128_ph256(__m128h __a) { /// A 128-bit vector of [8 x half]. /// \returns A 512-bit floating-point vector of [32 x half]. The lower 128 bits /// contain the value of the parameter. The upper 384 bits are set to zero. -static __inline__ __m512h __DEFAULT_FN_ATTRS512 +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextph128_ph512(__m128h __a) { return __builtin_shufflevector( __a, (__v8hf)_mm_setzero_ph(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, @@ -268,7 +285,7 @@ _mm512_zextph128_ph512(__m128h __a) { /// A 256-bit vector of [16 x half]. /// \returns A 512-bit floating-point vector of [32 x half]. The lower 256 bits /// contain the value of the parameter. The upper 256 bits are set to zero. -static __inline__ __m512h __DEFAULT_FN_ATTRS512 +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextph256_ph512(__m256h __a) { return __builtin_shufflevector(__a, (__v16hf)_mm256_setzero_ph(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, @@ -482,19 +499,19 @@ _mm512_maskz_div_ph(__mmask32 __U, __m512h __A, __m512h __B) { (__mmask32)(U), (__v32hf)_mm512_div_round_ph((A), (B), (R)), \ (__v32hf)_mm512_setzero_ph())) -static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_min_ph(__m512h __A, - __m512h __B) { +static __inline__ __m512h + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_ph(__m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_minph512((__v32hf)__A, (__v32hf)__B, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512h __DEFAULT_FN_ATTRS512 +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512( (__mmask32)__U, (__v32hf)_mm512_min_ph(__A, __B), (__v32hf)__W); } -static __inline__ __m512h __DEFAULT_FN_ATTRS512 +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_ph(__mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)_mm512_min_ph(__A, __B), @@ -515,19 +532,19 @@ _mm512_maskz_min_ph(__mmask32 __U, __m512h __A, __m512h __B) { (__mmask32)(U), (__v32hf)_mm512_min_round_ph((A), (B), (R)), \ (__v32hf)_mm512_setzero_ph())) -static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_max_ph(__m512h __A, - __m512h __B) { +static __inline__ __m512h + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_ph(__m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_maxph512((__v32hf)__A, (__v32hf)__B, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512h __DEFAULT_FN_ATTRS512 +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512( (__mmask32)__U, (__v32hf)_mm512_max_ph(__A, __B), (__v32hf)__W); } -static __inline__ __m512h __DEFAULT_FN_ATTRS512 +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_ph(__mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)_mm512_max_ph(__A, __B), @@ -548,7 +565,8 @@ _mm512_maskz_max_ph(__mmask32 __U, __m512h __A, __m512h __B) { (__mmask32)(U), (__v32hf)_mm512_max_round_ph((A), (B), (R)), \ (__v32hf)_mm512_setzero_ph())) -static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_abs_ph(__m512h __A) { +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_abs_ph(__m512h __A) { return (__m512h)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF), (__m512i)__A); } @@ -570,23 +588,20 @@ _mm512_maskz_conj_pch(__mmask16 __U, __m512h __A) { (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_sh(__m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_add_sh(__m128h __A, __m128h __B) { __A[0] += __B[0]; return __A; } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_sh(__m128h __W, - __mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_add_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { __A = _mm_add_sh(__A, __B); return __builtin_ia32_selectsh_128(__U, __A, __W); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_sh(__mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_add_sh(__mmask8 __U, __m128h __A, __m128h __B) { __A = _mm_add_sh(__A, __B); return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph()); } @@ -606,23 +621,20 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_sh(__mmask8 __U, (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_sh(__m128h __A, - __m128h __B) { +static __inline__ __m128h + __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sub_sh(__m128h __A, __m128h __B) { __A[0] -= __B[0]; return __A; } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_sh(__m128h __W, - __mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_sub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { __A = _mm_sub_sh(__A, __B); return __builtin_ia32_selectsh_128(__U, __A, __W); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sh(__mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_sub_sh(__mmask8 __U, __m128h __A, __m128h __B) { __A = _mm_sub_sh(__A, __B); return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph()); } @@ -642,23 +654,20 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sh(__mmask8 __U, (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_sh(__m128h __A, - __m128h __B) { +static __inline__ __m128h + __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mul_sh(__m128h __A, __m128h __B) { __A[0] *= __B[0]; return __A; } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_sh(__m128h __W, - __mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_mul_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { __A = _mm_mul_sh(__A, __B); return __builtin_ia32_selectsh_128(__U, __A, __W); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sh(__mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_mul_sh(__mmask8 __U, __m128h __A, __m128h __B) { __A = _mm_mul_sh(__A, __B); return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph()); } @@ -678,23 +687,20 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sh(__mmask8 __U, (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_sh(__m128h __A, - __m128h __B) { +static __inline__ __m128h + __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_div_sh(__m128h __A, __m128h __B) { __A[0] /= __B[0]; return __A; } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_sh(__m128h __W, - __mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_div_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { __A = _mm_div_sh(__A, __B); return __builtin_ia32_selectsh_128(__U, __A, __W); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_sh(__mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_div_sh(__mmask8 __U, __m128h __A, __m128h __B) { __A = _mm_div_sh(__A, __B); return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph()); } @@ -942,22 +948,19 @@ static __inline__ void __DEFAULT_FN_ATTRS128 _mm_storeu_ph(void *__P, } // moves with vmovsh: -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_move_sh(__m128h __a, - __m128h __b) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_move_sh(__m128h __a, __m128h __b) { __a[0] = __b[0]; return __a; } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_move_sh(__m128h __W, - __mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_move_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B), __W); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_move_sh(__mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_move_sh(__mmask8 __U, __m128h __A, __m128h __B) { return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B), _mm_setzero_ph()); } @@ -1383,24 +1386,20 @@ _mm_maskz_scalef_sh(__mmask8 __U, __m128h __A, __m128h __B) { (__v32hf)_mm512_setzero_ph())) static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sqrt_ph(__m512h __A) { - return (__m512h)__builtin_ia32_sqrtph512((__v32hf)__A, - _MM_FROUND_CUR_DIRECTION); + return (__m512h)__builtin_elementwise_sqrt((__v32hf)__A); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ph(__m512h __W, __mmask32 __U, __m512h __A) { return (__m512h)__builtin_ia32_selectph_512( - (__mmask32)(__U), - (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)), - (__v32hf)(__m512h)(__W)); + (__mmask32)(__U), (__v32hf)_mm512_sqrt_ph(__A), (__v32hf)(__m512h)(__W)); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ph(__mmask32 __U, __m512h __A) { - return (__m512h)__builtin_ia32_selectph_512( - (__mmask32)(__U), - (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)), - (__v32hf)_mm512_setzero_ph()); + return (__m512h)__builtin_ia32_selectph_512((__mmask32)(__U), + (__v32hf)_mm512_sqrt_ph(__A), + (__v32hf)_mm512_setzero_ph()); } #define _mm_sqrt_round_sh(A, B, R) \ @@ -3292,19 +3291,19 @@ _mm512_reduce_min_ph(__m512h __V) { return __builtin_ia32_reduce_fmin_ph512(__V); } -static __inline__ __m512h __DEFAULT_FN_ATTRS512 +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W) { return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)__W, (__v32hf)__A); } -static __inline__ __m512h __DEFAULT_FN_ATTRS512 +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_ph(__m512h __A, __m512i __I, __m512h __B) { return (__m512h)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I, (__v32hi)__B); } -static __inline__ __m512h __DEFAULT_FN_ATTRS512 +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_ph(__m512i __A, __m512h __B) { return (__m512h)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); } @@ -3348,6 +3347,9 @@ _mm512_permutexvar_ph(__m512i __A, __m512h __B) { #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #undef __DEFAULT_FN_ATTRS512 +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR +#undef __DEFAULT_FN_ATTRS512_CONSTEXPR #endif #endif diff --git a/lib/include/avx512ifmaintrin.h b/lib/include/avx512ifmaintrin.h index 9468d17556..f73b607df7 100644 --- a/lib/include/avx512ifmaintrin.h +++ b/lib/include/avx512ifmaintrin.h @@ -15,54 +15,52 @@ #define __IFMAINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512ifma,evex512"), __min_vector_width__(512))) + __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), \ + __min_vector_width__(512))) constexpr +#else +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), \ + __min_vector_width__(512))) +#endif static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z) -{ - return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di) __X, (__v8di) __Y, - (__v8di) __Z); +_mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) { + return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di)__X, (__v8di)__Y, + (__v8di)__Z); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_madd52hi_epu64( + __m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y), (__v8di)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52hi_epu64( + __mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z), + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y), - (__v8di)__W); +_mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) { + return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di)__X, (__v8di)__Y, + (__v8di)__Z); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z), - (__v8di)_mm512_setzero_si512()); +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_madd52lo_epu64( + __m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z) -{ - return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di) __X, (__v8di) __Y, - (__v8di) __Z); -} - -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y), - (__v8di)__W); -} - -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z), - (__v8di)_mm512_setzero_si512()); +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52lo_epu64( + __mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z), + (__v8di)_mm512_setzero_si512()); } #undef __DEFAULT_FN_ATTRS diff --git a/lib/include/avx512ifmavlintrin.h b/lib/include/avx512ifmavlintrin.h index 8787cd471d..51d5210e5a 100644 --- a/lib/include/avx512ifmavlintrin.h +++ b/lib/include/avx512ifmavlintrin.h @@ -8,22 +8,35 @@ *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H -#error "Never use directly; include instead." +#error \ + "Never use directly; include instead." #endif #ifndef __IFMAVLINTRIN_H #define __IFMAVLINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512ifma,avx512vl,no-evex512"), \ + __target__("avx512ifma,avx512vl"), \ + __min_vector_width__(128))) constexpr +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512ifma,avx512vl"), \ + __min_vector_width__(256))) constexpr +#else +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512ifma,avx512vl"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512ifma,avx512vl,no-evex512"), \ + __target__("avx512ifma,avx512vl"), \ __min_vector_width__(256))) +#endif +#if !(defined(__AVXIFMA__) || defined(__AVX512IFMA__)) #define _mm_madd52hi_epu64(X, Y, Z) \ ((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \ (__v2di)(Z))) @@ -39,71 +52,85 @@ #define _mm256_madd52lo_epu64(X, Y, Z) \ ((__m256i)__builtin_ia32_vpmadd52luq256((__v4di)(X), (__v4di)(Y), \ (__v4di)(Z))) +#endif +#if defined(__AVX512IFMA__) static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) -{ - return (__m128i)__builtin_ia32_selectq_128(__M, - (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), - (__v2di)__W); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) -{ - return (__m128i)__builtin_ia32_selectq_128(__M, - (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z), - (__v2di)_mm_setzero_si128()); +_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) { + return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di)__X, (__v2di)__Y, + (__v2di)__Z); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) -{ - return (__m256i)__builtin_ia32_selectq_256(__M, - (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), - (__v4di)__W); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) -{ - return (__m256i)__builtin_ia32_selectq_256(__M, - (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z), - (__v4di)_mm256_setzero_si256()); +_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) { + return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y, + (__v4di)__Z); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) -{ - return (__m128i)__builtin_ia32_selectq_128(__M, - (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), - (__v2di)__W); +_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) { + return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y, + (__v2di)__Z); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) { + return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y, + (__v4di)__Z); +} +#endif + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { + return (__m128i)__builtin_ia32_selectq_128( + __M, (__v2di)__builtin_ia32_vpmadd52huq128(__W, __X, __Y), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) -{ - return (__m128i)__builtin_ia32_selectq_128(__M, - (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z), - (__v2di)_mm_setzero_si128()); +_mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) { + return (__m128i)__builtin_ia32_selectq_128( + __M, (__v2di)__builtin_ia32_vpmadd52huq128(__X, __Y, __Z), + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) -{ - return (__m256i)__builtin_ia32_selectq_256(__M, - (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), - (__v4di)__W); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52hi_epu64( + __m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)__builtin_ia32_vpmadd52huq256(__W, __X, __Y), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) -{ - return (__m256i)__builtin_ia32_selectq_256(__M, - (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z), - (__v4di)_mm256_setzero_si256()); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52hi_epu64( + __mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)__builtin_ia32_vpmadd52huq256(__X, __Y, __Z), + (__v4di)_mm256_setzero_si256()); } +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { + return (__m128i)__builtin_ia32_selectq_128( + __M, (__v2di)__builtin_ia32_vpmadd52luq128(__W, __X, __Y), (__v2di)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) { + return (__m128i)__builtin_ia32_selectq_128( + __M, (__v2di)__builtin_ia32_vpmadd52luq128(__X, __Y, __Z), + (__v2di)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52lo_epu64( + __m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)__builtin_ia32_vpmadd52luq256(__W, __X, __Y), (__v4di)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52lo_epu64( + __mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)__builtin_ia32_vpmadd52luq256(__X, __Y, __Z), + (__v4di)_mm256_setzero_si256()); +} #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 diff --git a/lib/include/avx512vbmi2intrin.h b/lib/include/avx512vbmi2intrin.h index 11598c8887..a24b6e5921 100644 --- a/lib/include/avx512vbmi2intrin.h +++ b/lib/include/avx512vbmi2intrin.h @@ -15,8 +15,15 @@ #define __AVX512VBMI2INTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2,evex512"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), \ + __min_vector_width__(512))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) @@ -212,14 +219,14 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ (__v32hi)_mm512_setzero_si512())) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B, - (__v8di)__C); + return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__B, + (__v8du)__C); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -227,7 +234,7 @@ _mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) (__v8di)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -235,14 +242,14 @@ _mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B, - (__v16si)__C); + return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__B, + (__v16su)__C); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectd_512(__U, @@ -250,7 +257,7 @@ _mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) (__v16si)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectd_512(__U, @@ -258,14 +265,14 @@ _mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B, - (__v32hi)__C); + return (__m512i)__builtin_elementwise_fshl((__v32hu)__A, (__v32hu)__B, + (__v32hu)__C); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectw_512(__U, @@ -273,7 +280,7 @@ _mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) (__v32hi)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectw_512(__U, @@ -281,14 +288,15 @@ _mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B, - (__v8di)__C); + // Ops __A and __B are swapped. + return (__m512i)__builtin_elementwise_fshr((__v8du)__B, (__v8du)__A, + (__v8du)__C); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -296,7 +304,7 @@ _mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) (__v8di)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -304,14 +312,15 @@ _mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B, - (__v16si)__C); + // Ops __A and __B are swapped. + return (__m512i)__builtin_elementwise_fshr((__v16su)__B, (__v16su)__A, + (__v16su)__C); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { return (__m512i) __builtin_ia32_selectd_512(__U, @@ -319,7 +328,7 @@ _mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) (__v16si)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i) __builtin_ia32_selectd_512(__U, @@ -327,14 +336,15 @@ _mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B, - (__v32hi)__C); + // Ops __A and __B are swapped. + return (__m512i)__builtin_elementwise_fshr((__v32hu)__B, (__v32hu)__A, + (__v32hu)__C); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectw_512(__U, @@ -342,7 +352,7 @@ _mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) (__v32hi)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) { return (__m512i)__builtin_ia32_selectw_512(__U, @@ -352,6 +362,7 @@ _mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif diff --git a/lib/include/avx512vbmiintrin.h b/lib/include/avx512vbmiintrin.h index e47cd5cadd..5ac78f0849 100644 --- a/lib/include/avx512vbmiintrin.h +++ b/lib/include/avx512vbmiintrin.h @@ -15,63 +15,57 @@ #define __VBMIINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vbmi,evex512"), __min_vector_width__(512))) + __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), \ + __min_vector_width__(512))) constexpr +#else +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), \ + __min_vector_width__(512))) +#endif static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B) -{ +_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_vpermi2varqi512((__v64qi)__A, (__v64qi)__I, (__v64qi) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U, __m512i __I, - __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutex2var_epi8( + __m512i __A, __mmask64 __U, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512(__U, (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B), (__v64qi)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __mmask64 __U, - __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask2_permutex2var_epi8( + __m512i __A, __m512i __I, __mmask64 __U, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512(__U, (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B), (__v64qi)__I); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i __I, - __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutex2var_epi8( + __mmask64 __U, __m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512(__U, (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B), (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_permutexvar_epi8 (__m512i __A, __m512i __B) -{ +_mm512_permutexvar_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_permvarqi512((__v64qi) __B, (__v64qi) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_permutexvar_epi8 (__mmask64 __M, __m512i __A, - __m512i __B) -{ +_mm512_maskz_permutexvar_epi8(__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_permutexvar_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A, - __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutexvar_epi8( + __m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_permutexvar_epi8(__A, __B), (__v64qi)__W); @@ -100,7 +94,5 @@ _mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y) (__v64qi)_mm512_setzero_si512()); } - #undef __DEFAULT_FN_ATTRS - #endif diff --git a/lib/include/avx512vbmivlintrin.h b/lib/include/avx512vbmivlintrin.h index 848ca2d18c..40a67bd63c 100644 --- a/lib/include/avx512vbmivlintrin.h +++ b/lib/include/avx512vbmivlintrin.h @@ -15,126 +15,114 @@ #define __VBMIVLINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vbmi,avx512vl,no-evex512"), \ + __target__("avx512vbmi,avx512vl"), \ + __min_vector_width__(128))) constexpr +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vbmi,avx512vl"), \ + __min_vector_width__(256))) constexpr +#else +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vbmi,avx512vl"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vbmi,avx512vl,no-evex512"), \ + __target__("avx512vbmi,avx512vl"), \ __min_vector_width__(256))) +#endif static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) -{ +_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A, (__v16qi)__I, (__v16qi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I, - __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi8( + __m128i __A, __mmask16 __U, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128(__U, (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), (__v16qi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U, - __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi8( + __m128i __A, __m128i __I, __mmask16 __U, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128(__U, (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), (__v16qi)__I); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I, - __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi8( + __mmask16 __U, __m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128(__U, (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), (__v16qi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) -{ +_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I, (__v32qi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I, - __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi8( + __m256i __A, __mmask32 __U, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256(__U, (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), (__v32qi)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U, - __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi8( + __m256i __A, __m256i __I, __mmask32 __U, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256(__U, (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), (__v32qi)__I); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I, - __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi8( + __mmask32 __U, __m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256(__U, (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), (__v32qi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_permutexvar_epi8 (__m128i __A, __m128i __B) -{ +_mm_permutexvar_epi8(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B) -{ +_mm_maskz_permutexvar_epi8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_permutexvar_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A, - __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi8( + __m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_permutexvar_epi8(__A, __B), (__v16qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_permutexvar_epi8 (__m256i __A, __m256i __B) -{ +_mm256_permutexvar_epi8(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A, - __m256i __B) -{ +_mm256_maskz_permutexvar_epi8(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_permutexvar_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A, - __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi8( + __m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_permutexvar_epi8(__A, __B), (__v32qi)__W); @@ -186,8 +174,6 @@ _mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y) (__v32qi)_mm256_setzero_si256()); } - #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 - #endif diff --git a/lib/include/avx512vlbf16intrin.h b/lib/include/avx512vlbf16intrin.h index 89c9f49c7a..8543402065 100644 --- a/lib/include/avx512vlbf16intrin.h +++ b/lib/include/avx512vlbf16intrin.h @@ -17,13 +17,21 @@ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,avx512bf16,no-evex512"), \ + __target__("avx512vl,avx512bf16"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,avx512bf16,no-evex512"), \ + __target__("avx512vl,avx512bf16"), \ __min_vector_width__(256))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#else +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#endif + /// Convert Two Packed Single Data to One Packed BF16 Data. /// /// \headerfile @@ -421,9 +429,10 @@ static __inline__ __bf16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { /// \param __A /// A 128-bit vector of [4 x bfloat]. /// \returns A 128-bit vector of [4 x float] come from conversion of __A -static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtpbh_ps(__m128bh __A) { - return _mm_castsi128_ps( - (__m128i)_mm_slli_epi32((__m128i)_mm_cvtepi16_epi32((__m128i)__A), 16)); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtpbh_ps(__m128bh __A) { + return (__m128)_mm256_castps256_ps128( + (__m256) __builtin_convertvector(__A, __v8sf)); } /// Convert Packed BF16 Data to Packed float Data. @@ -433,9 +442,9 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtpbh_ps(__m128bh __A) { /// \param __A /// A 128-bit vector of [8 x bfloat]. /// \returns A 256-bit vector of [8 x float] come from conversion of __A -static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps(__m128bh __A) { - return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32( - (__m256i)_mm256_cvtepi16_epi32((__m128i)__A), 16)); +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtpbh_ps(__m128bh __A) { + return (__m256) __builtin_convertvector(__A, __v8sf); } /// Convert Packed BF16 Data to Packed float Data using zeroing mask. @@ -448,10 +457,10 @@ static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps(__m128bh __A) { /// \param __A /// A 128-bit vector of [4 x bfloat]. /// \returns A 128-bit vector of [4 x float] come from conversion of __A -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) { - return _mm_castsi128_ps((__m128i)_mm_slli_epi32( - (__m128i)_mm_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16)); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_cvtpbh_ps(__A), (__v4sf)_mm_setzero_ps()); } /// Convert Packed BF16 Data to Packed float Data using zeroing mask. @@ -464,10 +473,11 @@ _mm_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) { /// \param __A /// A 128-bit vector of [8 x bfloat]. /// \returns A 256-bit vector of [8 x float] come from conversion of __A -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) { - return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32( - (__m256i)_mm256_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16)); + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_cvtpbh_ps(__A), + (__v8sf)_mm256_setzero_ps()); } /// Convert Packed BF16 Data to Packed float Data using merging mask. @@ -483,11 +493,10 @@ _mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) { /// \param __A /// A 128-bit vector of [4 x bfloat]. /// \returns A 128-bit vector of [4 x float] come from conversion of __A -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtpbh_ps(__m128 __S, __mmask8 __U, __m128bh __A) { - return _mm_castsi128_ps((__m128i)_mm_mask_slli_epi32( - (__m128i)__S, (__mmask8)__U, (__m128i)_mm_cvtepi16_epi32((__m128i)__A), - 16)); + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_cvtpbh_ps(__A), (__v4sf)__S); } /// Convert Packed BF16 Data to Packed float Data using merging mask. @@ -503,15 +512,16 @@ _mm_mask_cvtpbh_ps(__m128 __S, __mmask8 __U, __m128bh __A) { /// \param __A /// A 128-bit vector of [8 x bfloat]. /// \returns A 256-bit vector of [8 x float] come from conversion of __A -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtpbh_ps(__m256 __S, __mmask8 __U, __m128bh __A) { - return _mm256_castsi256_ps((__m256i)_mm256_mask_slli_epi32( - (__m256i)__S, (__mmask8)__U, (__m256i)_mm256_cvtepi16_epi32((__m128i)__A), - 16)); + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_cvtpbh_ps(__A), (__v8sf)__S); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif #endif diff --git a/lib/include/avx512vlbitalgintrin.h b/lib/include/avx512vlbitalgintrin.h index 1b01fe0b9d..edfb9c1e1f 100644 --- a/lib/include/avx512vlbitalgintrin.h +++ b/lib/include/avx512vlbitalgintrin.h @@ -15,101 +15,86 @@ #define __AVX512VLBITALGINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,avx512bitalg,no-evex512"), \ + __target__("avx512vl,avx512bitalg"), \ + __min_vector_width__(128))) constexpr +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512bitalg"), \ + __min_vector_width__(256))) constexpr +#else +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512bitalg"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,avx512bitalg,no-evex512"), \ + __target__("avx512vl,avx512bitalg"), \ __min_vector_width__(256))) +#endif static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_popcnt_epi16(__m256i __A) -{ +_mm256_popcnt_epi16(__m256i __A) { return (__m256i)__builtin_elementwise_popcount((__v16hu)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) -{ - return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U, - (__v16hi) _mm256_popcnt_epi16(__B), - (__v16hi) __A); +_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) { + return (__m256i)__builtin_ia32_selectw_256( + (__mmask16)__U, (__v16hi)_mm256_popcnt_epi16(__B), (__v16hi)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) -{ - return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(), - __U, - __B); +_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) { + return _mm256_mask_popcnt_epi16((__m256i)_mm256_setzero_si256(), __U, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_popcnt_epi16(__m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi16(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v8hu)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) -{ - return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U, - (__v8hi) _mm_popcnt_epi16(__B), - (__v8hi) __A); +_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) { + return (__m128i)__builtin_ia32_selectw_128( + (__mmask8)__U, (__v8hi)_mm_popcnt_epi16(__B), (__v8hi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) -{ - return _mm_mask_popcnt_epi16((__m128i) _mm_setzero_si128(), - __U, - __B); +_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) { + return _mm_mask_popcnt_epi16((__m128i)_mm_setzero_si128(), __U, __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_popcnt_epi8(__m256i __A) -{ +_mm256_popcnt_epi8(__m256i __A) { return (__m256i)__builtin_elementwise_popcount((__v32qu)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) -{ - return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U, - (__v32qi) _mm256_popcnt_epi8(__B), - (__v32qi) __A); +_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) { + return (__m256i)__builtin_ia32_selectb_256( + (__mmask32)__U, (__v32qi)_mm256_popcnt_epi8(__B), (__v32qi)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) -{ - return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(), - __U, - __B); +_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) { + return _mm256_mask_popcnt_epi8((__m256i)_mm256_setzero_si256(), __U, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_popcnt_epi8(__m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi8(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v16qu)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) -{ - return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U, - (__v16qi) _mm_popcnt_epi8(__B), - (__v16qi) __A); +_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) { + return (__m128i)__builtin_ia32_selectb_128( + (__mmask16)__U, (__v16qi)_mm_popcnt_epi8(__B), (__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) -{ - return _mm_mask_popcnt_epi8((__m128i) _mm_setzero_si128(), - __U, - __B); +_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) { + return _mm_mask_popcnt_epi8((__m128i)_mm_setzero_si128(), __U, __B); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 @@ -147,5 +132,4 @@ _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 - #endif diff --git a/lib/include/avx512vlbwintrin.h b/lib/include/avx512vlbwintrin.h index 9aedba0669..fb5d9d4dcc 100644 --- a/lib/include/avx512vlbwintrin.h +++ b/lib/include/avx512vlbwintrin.h @@ -17,12 +17,18 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,avx512bw,no-evex512"), \ - __min_vector_width__(128))) + __target__("avx512vl,avx512bw"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,avx512bw,no-evex512"), \ - __min_vector_width__(256))) + __target__("avx512vl,avx512bw"), __min_vector_width__(256))) + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#else +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#endif /* Integer compare */ @@ -306,250 +312,238 @@ #define _mm256_mask_cmpneq_epu16_mask(k, A, B) \ _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_add_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_add_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_add_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_add_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_sub_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_sub_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_sub_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_sub_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_add_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_add_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_add_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_add_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_sub_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_sub_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_sub_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_sub_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mullo_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mullo_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mullo_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mullo_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, (__v16qi) __W, (__v16qi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, (__v32qi) __W, (__v32qi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, (__v8hi) __W, (__v8hi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, (__v16hi) __W, (__v16hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_abs_epi8(__A), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_abs_epi8(__mmask16 __U, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_abs_epi8(__mmask16 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_abs_epi8(__A), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_abs_epi8(__A), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_abs_epi8(__mmask32 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_abs_epi8(__A), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_abs_epi16(__A), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_abs_epi16(__mmask8 __U, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_abs_epi16(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_abs_epi16(__A), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_abs_epi16(__A), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_abs_epi16(__A), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_packs_epi32(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -557,7 +551,7 @@ _mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -565,7 +559,7 @@ _mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -573,7 +567,7 @@ _mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -581,7 +575,7 @@ _mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -589,7 +583,7 @@ _mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -597,7 +591,7 @@ _mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -605,7 +599,7 @@ _mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -613,7 +607,7 @@ _mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -621,7 +615,7 @@ _mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -629,7 +623,7 @@ _mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -637,7 +631,7 @@ _mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -645,7 +639,7 @@ _mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -653,7 +647,7 @@ _mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -661,7 +655,7 @@ _mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -669,7 +663,7 @@ _mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -677,7 +671,7 @@ _mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -685,7 +679,7 @@ _mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -693,7 +687,7 @@ _mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -701,7 +695,7 @@ _mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -709,7 +703,7 @@ _mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -717,7 +711,7 @@ _mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -725,7 +719,7 @@ _mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -733,7 +727,7 @@ _mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -741,7 +735,7 @@ _mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -749,7 +743,7 @@ _mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -757,7 +751,7 @@ _mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -765,7 +759,7 @@ _mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -773,7 +767,7 @@ _mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -781,7 +775,7 @@ _mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -789,7 +783,7 @@ _mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -797,359 +791,311 @@ _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, - (__v16qi)_mm_avg_epu8(__A, __B), - (__v16qi)__W); +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectb_128( + (__mmask16)__U, (__v16qi)_mm_avg_epu8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_avg_epu8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) -{ - return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, - (__v32qi)_mm256_avg_epu8(__A, __B), - (__v32qi)__W); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectb_256( + (__mmask32)__U, (__v32qi)_mm256_avg_epu8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_avg_epu8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, - (__v8hi)_mm_avg_epu16(__A, __B), - (__v8hi)__W); +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectw_128( + (__mmask8)__U, (__v8hi)_mm_avg_epu16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_avg_epu16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) -{ - return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, - (__v16hi)_mm256_avg_epu16(__A, __B), - (__v16hi)__W); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectw_256( + (__mmask16)__U, (__v16hi)_mm256_avg_epu16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) -{ - return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, - (__v16hi)_mm256_avg_epu16(__A, __B), - (__v16hi)_mm256_setzero_si256()); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectw_256( + (__mmask16)__U, (__v16hi)_mm256_avg_epu16(__A, __B), + (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_max_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_max_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_max_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_max_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_max_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_max_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_max_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_max_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_max_epu8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_max_epu8(__A, __B), (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_max_epu8(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_max_epu8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_max_epu8(__A, __B), (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_max_epu16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_max_epu16(__A, __B), (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_max_epu16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_max_epu16(__A, __B), (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_min_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_min_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_min_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_min_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_min_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_min_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_min_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_min_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_min_epu8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm_min_epu8(__A, __B), (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_min_epu8(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_min_epu8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, (__v32qi)_mm256_min_epu8(__A, __B), (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_min_epu16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_min_epu16(__A, __B), (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_min_epu16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_min_epu16(__A, __B), (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_shuffle_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_shuffle_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_shuffle_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_shuffle_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1157,7 +1103,7 @@ _mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1165,7 +1111,7 @@ _mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1173,7 +1119,7 @@ _mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1181,7 +1127,7 @@ _mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1189,7 +1135,7 @@ _mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1197,7 +1143,7 @@ _mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1205,7 +1151,7 @@ _mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1213,7 +1159,7 @@ _mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1221,7 +1167,7 @@ _mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1229,7 +1175,7 @@ _mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B) (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1237,7 +1183,7 @@ _mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1245,7 +1191,7 @@ _mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B) (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1253,7 +1199,7 @@ _mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1261,7 +1207,7 @@ _mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1269,7 +1215,7 @@ _mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1277,89 +1223,81 @@ _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I, (__v8hi) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I, - __m128i __B) -{ + __m128i __B) { return (__m128i)__builtin_ia32_selectw_128(__U, (__v8hi)_mm_permutex2var_epi16(__A, __I, __B), (__v8hi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U, - __m128i __B) -{ + __m128i __B) { return (__m128i)__builtin_ia32_selectw_128(__U, (__v8hi)_mm_permutex2var_epi16(__A, __I, __B), (__v8hi)__I); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I, - __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_permutex2var_epi16(__mmask8 __U, __m128i __A, __m128i __I, + __m128i __B) { return (__m128i)__builtin_ia32_selectw_128(__U, (__v8hi)_mm_permutex2var_epi16(__A, __I, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I, (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I, - __m256i __B) -{ + __m256i __B) { return (__m256i)__builtin_ia32_selectw_256(__U, (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B), (__v16hi)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U, - __m256i __B) -{ + __m256i __B) { return (__m256i)__builtin_ia32_selectw_256(__U, (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B), (__v16hi)__I); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, __m256i __I, - __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_permutex2var_epi16(__mmask16 __U, __m256i __A, __m256i __I, + __m256i __B) { return (__m256i)__builtin_ia32_selectw_256(__U, (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_maddubs_epi16(__X, __Y), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_maddubs_epi16(__X, __Y), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1367,35 +1305,35 @@ _mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X, (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_maddubs_epi16(__X, __Y), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_madd_epi16(__A, __B), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_madd_epi16(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_madd_epi16(__A, __B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_madd_epi16(__A, __B), @@ -1486,8 +1424,8 @@ _mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A) { __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtepi16_epi8 (__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi16_epi8(__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v8hi)__A, __v8qi), (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, @@ -1527,20 +1465,20 @@ _mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi16_epi8 (__m256i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi16_epi8(__m256i __A) { return (__m128i)__builtin_convertvector((__v16hi) __A, __v16qi); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm256_cvtepi16_epi8(__A), (__v16qi)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepi16_epi8(__mmask16 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm256_cvtepi16_epi8(__A), (__v16qi)_mm_setzero_si128()); @@ -1564,203 +1502,198 @@ _mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhrs_epi16(__X, __Y), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhrs_epi16(__X, __Y), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhrs_epi16(__X, __Y), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhrs_epi16(__X, __Y), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, - (__v8hi)_mm_mulhi_epu16(__A, __B), - (__v8hi)__W); + return (__m128i)__builtin_ia32_selectw_128( + (__mmask8)__U, (__v8hi)_mm_mulhi_epu16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhi_epu16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, - (__v16hi)_mm256_mulhi_epu16(__A, __B), - (__v16hi)__W); + return (__m256i)__builtin_ia32_selectw_256( + (__mmask16)__U, (__v16hi)_mm256_mulhi_epu16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, - (__v16hi)_mm256_mulhi_epu16(__A, __B), - (__v16hi)_mm256_setzero_si256()); + return (__m256i)__builtin_ia32_selectw_256( + (__mmask16)__U, (__v16hi)_mm256_mulhi_epu16(__A, __B), + (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, - (__v8hi)_mm_mulhi_epi16(__A, __B), - (__v8hi)__W); + return (__m128i)__builtin_ia32_selectw_128( + (__mmask8)__U, (__v8hi)_mm_mulhi_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhi_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, - (__v16hi)_mm256_mulhi_epi16(__A, __B), - (__v16hi)__W); + return (__m256i)__builtin_ia32_selectw_256( + (__mmask16)__U, (__v16hi)_mm256_mulhi_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, - (__v16hi)_mm256_mulhi_epi16(__A, __B), - (__v16hi)_mm256_setzero_si256()); + return (__m256i)__builtin_ia32_selectw_256( + (__mmask16)__U, (__v16hi)_mm256_mulhi_epi16(__A, __B), + (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, - (__v16qi)_mm_unpackhi_epi8(__A, __B), - (__v16qi)__W); + return (__m128i)__builtin_ia32_selectb_128( + (__mmask16)__U, (__v16qi)_mm_unpackhi_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpackhi_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpackhi_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpackhi_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpackhi_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpackhi_epi16(__A, __B), (__v8hi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpackhi_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpackhi_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpacklo_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpacklo_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpacklo_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpacklo_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpacklo_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpacklo_epi16(__A, __B), (__v8hi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpacklo_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpacklo_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1768,7 +1701,7 @@ _mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1776,7 +1709,7 @@ _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1784,7 +1717,7 @@ _mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1793,7 +1726,7 @@ _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A) } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1801,7 +1734,7 @@ _mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1809,7 +1742,7 @@ _mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1817,7 +1750,7 @@ _mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1868,13 +1801,13 @@ _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A) (imm)), \ (__v16hi)_mm256_setzero_si256())) -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sllv_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1882,7 +1815,7 @@ _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1890,13 +1823,13 @@ _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sllv_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1904,7 +1837,7 @@ _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1944,7 +1877,7 @@ _mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1952,7 +1885,7 @@ _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1960,30 +1893,28 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, - unsigned int __B) -{ + unsigned int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_slli_epi16(__A, (int)__B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_slli_epi16(__A, (int)__B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srlv_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1991,7 +1922,7 @@ _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1999,13 +1930,13 @@ _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srlv_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2013,7 +1944,7 @@ _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2021,13 +1952,13 @@ _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srav_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2035,7 +1966,7 @@ _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2043,13 +1974,13 @@ _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srav_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2057,7 +1988,7 @@ _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2097,34 +2028,30 @@ _mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_srai_epi16(__A, (int)__B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_srai_epi16(__A, (int)__B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A, - unsigned int __B) -{ + unsigned int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_srai_epi16(__A, (int)__B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_srai_epi16(__A, (int)__B), (__v16hi)_mm256_setzero_si256()); @@ -2162,104 +2089,91 @@ _mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_srli_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, int __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_srli_epi16(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_srli_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_srli_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_srli_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_mov_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, (__v8hi) __A, (__v8hi) __W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_mov_epi16(__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, (__v8hi) __A, (__v8hi) _mm_setzero_si128 ()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_mov_epi16(__m256i __W, __mmask16 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, (__v16hi) __A, (__v16hi) __W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_mov_epi16(__mmask16 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, (__v16hi) __A, (__v16hi) _mm256_setzero_si256 ()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_mov_epi8(__m128i __W, __mmask16 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, (__v16qi) __A, (__v16qi) __W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_mov_epi8(__mmask16 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, (__v16qi) __A, (__v16qi) _mm_setzero_si128 ()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_mov_epi8(__m256i __W, __mmask32 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, (__v32qi) __A, (__v32qi) __W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_mov_epi8(__mmask32 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, (__v32qi) __A, (__v32qi) _mm256_setzero_si256 ()); } - -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A) { return (__m128i) __builtin_ia32_selectb_128(__M, @@ -2267,7 +2181,7 @@ _mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A) (__v16qi) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_set1_epi8 (__mmask16 __M, char __A) { return (__m128i) __builtin_ia32_selectb_128(__M, @@ -2275,7 +2189,7 @@ _mm_maskz_set1_epi8 (__mmask16 __M, char __A) (__v16qi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A) { return (__m256i) __builtin_ia32_selectb_256(__M, @@ -2283,7 +2197,7 @@ _mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A) (__v32qi) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_set1_epi8 (__mmask32 __M, char __A) { return (__m256i) __builtin_ia32_selectb_256(__M, @@ -2463,22 +2377,19 @@ _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A) (__mmask32) __U); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 -_mm_test_epi8_mask (__m128i __A, __m128i __B) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_test_epi8_mask(__m128i __A, __m128i __B) { return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 -_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 -_mm256_test_epi8_mask (__m256i __A, __m256i __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_test_epi8_mask(__m256i __A, __m256i __B) { return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B), _mm256_setzero_si256()); } @@ -2517,9 +2428,8 @@ _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) _mm256_setzero_si256()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 -_mm_testn_epi8_mask (__m128i __A, __m128i __B) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_testn_epi8_mask(__m128i __A, __m128i __B) { return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } @@ -2570,55 +2480,47 @@ _mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) _mm256_setzero_si256()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 -_mm_movepi8_mask (__m128i __A) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_movepi8_mask(__m128i __A) { return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 -_mm256_movepi8_mask (__m256i __A) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_movepi8_mask(__m256i __A) { return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 -_mm_movepi16_mask (__m128i __A) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_movepi16_mask(__m128i __A) { return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 -_mm256_movepi16_mask (__m256i __A) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_movepi16_mask(__m256i __A) { return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_movm_epi8 (__mmask16 __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_movm_epi8(__mmask16 __A) { return (__m128i) __builtin_ia32_cvtmask2b128 (__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_movm_epi8 (__mmask32 __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_movm_epi8(__mmask32 __A) { return (__m256i) __builtin_ia32_cvtmask2b256 (__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_movm_epi16 (__mmask8 __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_movm_epi16(__mmask8 __A) { return (__m128i) __builtin_ia32_cvtmask2w128 (__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_movm_epi16 (__mmask16 __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_movm_epi16(__mmask16 __A) { return (__m256i) __builtin_ia32_cvtmask2w256 (__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128(__M, @@ -2626,7 +2528,7 @@ _mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A) (__v16qi) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128(__M, @@ -2634,7 +2536,7 @@ _mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A) (__v16qi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectb_256(__M, @@ -2642,7 +2544,7 @@ _mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A) (__v32qi) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectb_256(__M, @@ -2650,7 +2552,7 @@ _mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A) (__v32qi) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128(__M, @@ -2658,7 +2560,7 @@ _mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A) (__v8hi) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128(__M, @@ -2666,7 +2568,7 @@ _mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A) (__v8hi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256(__M, @@ -2674,7 +2576,7 @@ _mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A) (__v16hi) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256(__M, @@ -2682,7 +2584,7 @@ _mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A) (__v16hi) _mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A) { return (__m256i) __builtin_ia32_selectw_256 (__M, @@ -2690,7 +2592,7 @@ _mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A) (__v16hi) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_set1_epi16 (__mmask16 __M, short __A) { return (__m256i) __builtin_ia32_selectw_256(__M, @@ -2698,7 +2600,7 @@ _mm256_maskz_set1_epi16 (__mmask16 __M, short __A) (__v16hi) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A) { return (__m128i) __builtin_ia32_selectw_128(__M, @@ -2706,7 +2608,7 @@ _mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A) (__v8hi) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_set1_epi16 (__mmask8 __M, short __A) { return (__m128i) __builtin_ia32_selectw_128(__M, @@ -2714,48 +2616,41 @@ _mm_maskz_set1_epi16 (__mmask8 __M, short __A) (__v8hi) _mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_permutexvar_epi16 (__m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_permutexvar_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_permvarhi128((__v8hi) __B, (__v8hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_permutexvar_epi16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_permutexvar_epi16(__A, __B), (__v8hi) _mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A, - __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_permutexvar_epi16(__m128i __W, __mmask8 __M, __m128i __A, + __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_permutexvar_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_permutexvar_epi16 (__m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_permutexvar_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_permvarhi256((__v16hi) __B, (__v16hi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A, - __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_permutexvar_epi16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_permutexvar_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A, - __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_permutexvar_epi16(__m256i __W, __mmask16 __M, __m256i __A, + __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, (__v16hi)_mm256_permutexvar_epi16(__A, __B), (__v16hi)__W); @@ -2809,353 +2704,353 @@ _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A, (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ (__v16hi)_mm256_setzero_si256())) -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_add_epi16(__m128i __W) { return __builtin_reduce_add((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_mul_epi16(__m128i __W) { return __builtin_reduce_mul((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_and_epi16(__m128i __W) { return __builtin_reduce_and((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_or_epi16(__m128i __W) { return __builtin_reduce_or((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 -_mm_mask_reduce_add_epi16( __mmask8 __M, __m128i __W) { +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W) { __W = _mm_maskz_mov_epi16(__M, __W); return __builtin_reduce_add((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 -_mm_mask_reduce_mul_epi16( __mmask8 __M, __m128i __W) { +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_reduce_mul_epi16(__mmask8 __M, __m128i __W) { __W = _mm_mask_mov_epi16(_mm_set1_epi16(1), __M, __W); return __builtin_reduce_mul((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 -_mm_mask_reduce_and_epi16( __mmask8 __M, __m128i __W) { +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_reduce_and_epi16(__mmask8 __M, __m128i __W) { __W = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __W); return __builtin_reduce_and((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W) { __W = _mm_maskz_mov_epi16(__M, __W); return __builtin_reduce_or((__v8hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epi16(__m128i __V) { return __builtin_reduce_max((__v8hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS128 +static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epu16(__m128i __V) { return __builtin_reduce_max((__v8hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epi16(__m128i __V) { return __builtin_reduce_min((__v8hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS128 +static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epu16(__m128i __V) { return __builtin_reduce_min((__v8hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epi16(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi16(_mm_set1_epi16(-32767-1), __M, __V); return __builtin_reduce_max((__v8hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS128 +static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epu16(__mmask16 __M, __m128i __V) { __V = _mm_maskz_mov_epi16(__M, __V); return __builtin_reduce_max((__v8hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS128 +static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epi16(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi16(_mm_set1_epi16(32767), __M, __V); return __builtin_reduce_min((__v8hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS128 +static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epu16(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __V); return __builtin_reduce_min((__v8hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_add_epi16(__m256i __W) { return __builtin_reduce_add((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_mul_epi16(__m256i __W) { return __builtin_reduce_mul((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_and_epi16(__m256i __W) { return __builtin_reduce_and((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_or_epi16(__m256i __W) { return __builtin_reduce_or((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 -_mm256_mask_reduce_add_epi16( __mmask16 __M, __m256i __W) { +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_reduce_add_epi16(__mmask16 __M, __m256i __W) { __W = _mm256_maskz_mov_epi16(__M, __W); return __builtin_reduce_add((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 -_mm256_mask_reduce_mul_epi16( __mmask16 __M, __m256i __W) { +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_reduce_mul_epi16(__mmask16 __M, __m256i __W) { __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(1), __M, __W); return __builtin_reduce_mul((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 -_mm256_mask_reduce_and_epi16( __mmask16 __M, __m256i __W) { +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_reduce_and_epi16(__mmask16 __M, __m256i __W) { __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __W); return __builtin_reduce_and((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W) { __W = _mm256_maskz_mov_epi16(__M, __W); return __builtin_reduce_or((__v16hi)__W); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epi16(__m256i __V) { return __builtin_reduce_max((__v16hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS256 +static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epu16(__m256i __V) { return __builtin_reduce_max((__v16hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epi16(__m256i __V) { return __builtin_reduce_min((__v16hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS256 +static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epu16(__m256i __V) { return __builtin_reduce_min((__v16hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __V) { __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-32767-1), __M, __V); return __builtin_reduce_max((__v16hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS256 +static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __V) { __V = _mm256_maskz_mov_epi16(__M, __V); return __builtin_reduce_max((__v16hu)__V); } -static __inline__ short __DEFAULT_FN_ATTRS256 +static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __V) { __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(32767), __M, __V); return __builtin_reduce_min((__v16hi)__V); } -static __inline__ unsigned short __DEFAULT_FN_ATTRS256 +static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __V) { __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __V); return __builtin_reduce_min((__v16hu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_add_epi8(__m128i __W) { return __builtin_reduce_add((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_mul_epi8(__m128i __W) { return __builtin_reduce_mul((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_and_epi8(__m128i __W) { return __builtin_reduce_and((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_or_epi8(__m128i __W) { return __builtin_reduce_or((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W) { __W = _mm_maskz_mov_epi8(__M, __W); return __builtin_reduce_add((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W) { __W = _mm_mask_mov_epi8(_mm_set1_epi8(1), __M, __W); return __builtin_reduce_mul((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W) { __W = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __W); return __builtin_reduce_and((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W) { __W = _mm_maskz_mov_epi8(__M, __W); return __builtin_reduce_or((__v16qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epi8(__m128i __V) { return __builtin_reduce_max((__v16qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS128 +static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epu8(__m128i __V) { return __builtin_reduce_max((__v16qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epi8(__m128i __V) { return __builtin_reduce_min((__v16qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS128 +static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epu8(__m128i __V) { return __builtin_reduce_min((__v16qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi8(_mm_set1_epi8(-127-1), __M, __V); return __builtin_reduce_max((__v16qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS128 +static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __V) { __V = _mm_maskz_mov_epi8(__M, __V); return __builtin_reduce_max((__v16qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS128 +static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi8(_mm_set1_epi8(127), __M, __V); return __builtin_reduce_min((__v16qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS128 +static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __V) { __V = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __V); return __builtin_reduce_min((__v16qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_add_epi8(__m256i __W) { return __builtin_reduce_add((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_mul_epi8(__m256i __W) { return __builtin_reduce_mul((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_and_epi8(__m256i __W) { return __builtin_reduce_and((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_or_epi8(__m256i __W) { return __builtin_reduce_or((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W) { __W = _mm256_maskz_mov_epi8(__M, __W); return __builtin_reduce_add((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W) { __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(1), __M, __W); return __builtin_reduce_mul((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W) { __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __W); return __builtin_reduce_and((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W) { __W = _mm256_maskz_mov_epi8(__M, __W); return __builtin_reduce_or((__v32qs)__W); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epi8(__m256i __V) { return __builtin_reduce_max((__v32qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS256 +static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epu8(__m256i __V) { return __builtin_reduce_max((__v32qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epi8(__m256i __V) { return __builtin_reduce_min((__v32qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS256 +static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epu8(__m256i __V) { return __builtin_reduce_min((__v32qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __V) { __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-127-1), __M, __V); return __builtin_reduce_max((__v32qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS256 +static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __V) { __V = _mm256_maskz_mov_epi8(__M, __V); return __builtin_reduce_max((__v32qu)__V); } -static __inline__ signed char __DEFAULT_FN_ATTRS256 +static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __V) { __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(127), __M, __V); return __builtin_reduce_min((__v32qs)__V); } -static __inline__ unsigned char __DEFAULT_FN_ATTRS256 +static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V) { __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __V); return __builtin_reduce_min((__v32qu)__V); @@ -3163,5 +3058,7 @@ _mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V) { #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif /* __AVX512VLBWINTRIN_H */ diff --git a/lib/include/avx512vlcdintrin.h b/lib/include/avx512vlcdintrin.h index 923e2c551a..df66e1df3b 100644 --- a/lib/include/avx512vlcdintrin.h +++ b/lib/include/avx512vlcdintrin.h @@ -14,211 +14,183 @@ #define __AVX512VLCDINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,avx512cd,no-evex512"), \ - __min_vector_width__(128))) + __target__("avx512vl,avx512cd"), \ + __min_vector_width__(128))) constexpr #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,avx512cd,no-evex512"), \ - __min_vector_width__(256))) + __target__("avx512vl,avx512cd"), \ + __min_vector_width__(256))) constexpr +#else +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512cd"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512cd"), __min_vector_width__(256))) +#endif static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_broadcastmb_epi64 (__mmask8 __A) -{ +_mm_broadcastmb_epi64(__mmask8 __A) { return (__m128i) _mm_set1_epi64x((long long) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcastmb_epi64 (__mmask8 __A) -{ - return (__m256i) _mm256_set1_epi64x((long long)__A); +_mm256_broadcastmb_epi64(__mmask8 __A) { + return (__m256i)_mm256_set1_epi64x((long long)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_broadcastmw_epi32 (__mmask16 __A) -{ +_mm_broadcastmw_epi32(__mmask16 __A) { return (__m128i) _mm_set1_epi32((int)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcastmw_epi32 (__mmask16 __A) -{ +_mm256_broadcastmw_epi32(__mmask16 __A) { return (__m256i) _mm256_set1_epi32((int)__A); } - static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_conflict_epi64 (__m128i __A) -{ - return (__m128i) __builtin_ia32_vpconflictdi_128 ((__v2di) __A); +_mm_conflict_epi64(__m128i __A) { + return (__m128i)__builtin_ia32_vpconflictdi_128((__v2di)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_conflict_epi64(__A), - (__v2di)__W); +_mm_mask_conflict_epi64(__m128i __W, __mmask8 __U, __m128i __A) { + return (__m128i)__builtin_ia32_selectq_128( + (__mmask8)__U, (__v2di)_mm_conflict_epi64(__A), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A) -{ +_mm_maskz_conflict_epi64(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_conflict_epi64(__A), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_conflict_epi64 (__m256i __A) -{ - return (__m256i) __builtin_ia32_vpconflictdi_256 ((__v4di) __A); +_mm256_conflict_epi64(__m256i __A) { + return (__m256i)__builtin_ia32_vpconflictdi_256((__v4di)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_conflict_epi64(__A), - (__v4di)__W); +_mm256_mask_conflict_epi64(__m256i __W, __mmask8 __U, __m256i __A) { + return (__m256i)__builtin_ia32_selectq_256( + (__mmask8)__U, (__v4di)_mm256_conflict_epi64(__A), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A) -{ +_mm256_maskz_conflict_epi64(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_conflict_epi64(__A), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_conflict_epi32 (__m128i __A) -{ - return (__m128i) __builtin_ia32_vpconflictsi_128 ((__v4si) __A); +_mm_conflict_epi32(__m128i __A) { + return (__m128i)__builtin_ia32_vpconflictsi_128((__v4si)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_conflict_epi32(__A), - (__v4si)__W); +_mm_mask_conflict_epi32(__m128i __W, __mmask8 __U, __m128i __A) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_conflict_epi32(__A), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A) -{ +_mm_maskz_conflict_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_conflict_epi32(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_conflict_epi32 (__m256i __A) -{ - return (__m256i) __builtin_ia32_vpconflictsi_256 ((__v8si) __A); +_mm256_conflict_epi32(__m256i __A) { + return (__m256i)__builtin_ia32_vpconflictsi_256((__v8si)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_conflict_epi32(__A), - (__v8si)__W); +_mm256_mask_conflict_epi32(__m256i __W, __mmask8 __U, __m256i __A) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_conflict_epi32(__A), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A) -{ +_mm256_maskz_conflict_epi32(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_conflict_epi32(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_lzcnt_epi32 (__m128i __A) -{ - return (__m128i) __builtin_ia32_vplzcntd_128 ((__v4si) __A); +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi32(__m128i __A) { + return (__m128i)__builtin_elementwise_clzg((__v4si)__A, + (__v4si)_mm_set1_epi32(32)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_lzcnt_epi32(__A), - (__v4si)__W); +_mm_mask_lzcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_lzcnt_epi32(__A), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_lzcnt_epi32(__A), - (__v4si)_mm_setzero_si128()); +_mm_maskz_lzcnt_epi32(__mmask8 __U, __m128i __A) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_lzcnt_epi32(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_lzcnt_epi32 (__m256i __A) -{ - return (__m256i) __builtin_ia32_vplzcntd_256 ((__v8si) __A); +_mm256_lzcnt_epi32(__m256i __A) { + return (__m256i)__builtin_elementwise_clzg((__v8si)__A, + (__v8si)_mm256_set1_epi32(32)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_lzcnt_epi32(__A), - (__v8si)__W); +_mm256_mask_lzcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_lzcnt_epi32(__A), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A) -{ +_mm256_maskz_lzcnt_epi32(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_lzcnt_epi32(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_lzcnt_epi64 (__m128i __A) -{ - return (__m128i) __builtin_ia32_vplzcntq_128 ((__v2di) __A); +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi64(__m128i __A) { + return (__m128i)__builtin_elementwise_clzg( + (__v2di)__A, (__v2di)_mm_set1_epi64x((long long)64)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_lzcnt_epi64(__A), - (__v2di)__W); +_mm_mask_lzcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) { + return (__m128i)__builtin_ia32_selectq_128( + (__mmask8)__U, (__v2di)_mm_lzcnt_epi64(__A), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_lzcnt_epi64(__A), - (__v2di)_mm_setzero_si128()); +_mm_maskz_lzcnt_epi64(__mmask8 __U, __m128i __A) { + return (__m128i)__builtin_ia32_selectq_128( + (__mmask8)__U, (__v2di)_mm_lzcnt_epi64(__A), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_lzcnt_epi64 (__m256i __A) -{ - return (__m256i) __builtin_ia32_vplzcntq_256 ((__v4di) __A); +_mm256_lzcnt_epi64(__m256i __A) { + return (__m256i)__builtin_elementwise_clzg( + (__v4di)__A, (__v4di)_mm256_set1_epi64x((long long)64)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_lzcnt_epi64(__A), - (__v4di)__W); +_mm256_mask_lzcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) { + return (__m256i)__builtin_ia32_selectq_256( + (__mmask8)__U, (__v4di)_mm256_lzcnt_epi64(__A), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A) -{ +_mm256_maskz_lzcnt_epi64(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_lzcnt_epi64(__A), (__v4di)_mm256_setzero_si256()); diff --git a/lib/include/avx512vldqintrin.h b/lib/include/avx512vldqintrin.h index 272cdd89e2..cd1effdec2 100644 --- a/lib/include/avx512vldqintrin.h +++ b/lib/include/avx512vldqintrin.h @@ -17,45 +17,51 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,avx512dq,no-evex512"), \ - __min_vector_width__(128))) + __target__("avx512vl,avx512dq"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,avx512dq,no-evex512"), \ - __min_vector_width__(256))) + __target__("avx512vl,avx512dq"), __min_vector_width__(256))) -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mullo_epi64 (__m256i __A, __m256i __B) { +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#else +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#endif + +static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm256_mullo_epi64(__m256i __A, __m256i __B) { return (__m256i) ((__v4du) __A * (__v4du) __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_mullo_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_mullo_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mullo_epi64 (__m128i __A, __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mullo_epi64(__m128i __A, __m128i __B) { return (__m128i) ((__v2du) __A * (__v2du) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_mullo_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_mullo_epi64(__A, __B), @@ -454,39 +460,39 @@ _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_cvtepi64_pd (__m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi64_pd(__m128i __A) { return (__m128d)__builtin_convertvector((__v2di)__A, __v2df); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_cvtepi64_pd(__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepi64_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_cvtepi64_pd(__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepi64_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_cvtepi64_pd (__m256i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi64_pd(__m256i __A) { return (__m256d)__builtin_convertvector((__v4di)__A, __v4df); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepi64_pd(__m256d __W, __mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepi64_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepi64_pd(__mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepi64_pd(__A), (__v4df)_mm256_setzero_pd()); @@ -513,20 +519,20 @@ _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) { (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS256 -_mm256_cvtepi64_ps (__m256i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi64_ps(__m256i __A) { return (__m128)__builtin_convertvector((__v4di)__A, __v4sf); } -static __inline__ __m128 __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m256i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtepi64_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepi64_ps(__mmask8 __U, __m256i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtepi64_ps(__A), (__v4sf)_mm_setzero_ps()); @@ -700,39 +706,39 @@ _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_cvtepu64_pd (__m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepu64_pd(__m128i __A) { return (__m128d)__builtin_convertvector((__v2du)__A, __v2df); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_cvtepu64_pd(__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepu64_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_cvtepu64_pd(__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepu64_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_cvtepu64_pd (__m256i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu64_pd(__m256i __A) { return (__m256d)__builtin_convertvector((__v4du)__A, __v4df); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepu64_pd(__m256d __W, __mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepu64_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepu64_pd(__mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepu64_pd(__A), (__v4df)_mm256_setzero_pd()); @@ -759,20 +765,20 @@ _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) { (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS256 -_mm256_cvtepu64_ps (__m256i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu64_ps(__m256i __A) { return (__m128)__builtin_convertvector((__v4du)__A, __v4sf); } -static __inline__ __m128 __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m256i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtepu64_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtepu64_ps(__A), (__v4sf)_mm_setzero_ps()); @@ -908,174 +914,150 @@ _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U))) -static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 -_mm_movepi32_mask (__m128i __A) -{ +static __inline__ __mmask8 + __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_movepi32_mask(__m128i __A) { return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 -_mm256_movepi32_mask (__m256i __A) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_movepi32_mask(__m256i __A) { return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_movm_epi32 (__mmask8 __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_movm_epi32(__mmask8 __A) { return (__m128i) __builtin_ia32_cvtmask2d128 (__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_movm_epi32 (__mmask8 __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_movm_epi32(__mmask8 __A) { return (__m256i) __builtin_ia32_cvtmask2d256 (__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_movm_epi64 (__mmask8 __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_movm_epi64(__mmask8 __A) { return (__m128i) __builtin_ia32_cvtmask2q128 (__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_movm_epi64 (__mmask8 __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_movm_epi64(__mmask8 __A) { return (__m256i) __builtin_ia32_cvtmask2q256 (__A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 -_mm_movepi64_mask (__m128i __A) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_movepi64_mask(__m128i __A) { return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 -_mm256_movepi64_mask (__m256i __A) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_movepi64_mask(__m256i __A) { return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_broadcast_f32x2 (__m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcast_f32x2(__m128 __A) { return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, (__v8sf)_mm256_broadcast_f32x2(__A), (__v8sf)__O); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, (__v8sf)_mm256_broadcast_f32x2(__A), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_broadcast_f64x2(__m128d __A) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcast_f64x2(__m128d __A) { return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 0, 1, 0, 1); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, (__v4df)_mm256_broadcast_f64x2(__A), (__v4df)__O); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, (__v4df)_mm256_broadcast_f64x2(__A), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_broadcast_i32x2 (__m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_broadcast_i32x2(__m128i __A) { return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 0, 1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_broadcast_i32x2(__A), (__v4si)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_broadcast_i32x2(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcast_i32x2 (__m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcast_i32x2(__m128i __A) { return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_broadcast_i32x2(__A), (__v8si)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_broadcast_i32x2(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcast_i64x2(__m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcast_i64x2(__m128i __A) { return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 0, 1, 0, 1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_broadcast_i64x2(__A), (__v4di)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_broadcast_i64x2(__A), (__v4di)_mm256_setzero_si256()); } -#define _mm256_extractf64x2_pd(A, imm) \ - ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ - (int)(imm), \ - (__v2df)_mm_undefined_pd(), \ - (__mmask8)-1)) +#define _mm256_extractf64x2_pd(A, imm) \ + ((__m128d)__builtin_ia32_extractf64x2_256_mask( \ + (__v4df)(__m256d)(A), (int)(imm), (__v2df)_mm_setzero_pd(), \ + (__mmask8) - 1)) #define _mm256_mask_extractf64x2_pd(W, U, A, imm) \ ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ @@ -1089,11 +1071,10 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) (__v2df)_mm_setzero_pd(), \ (__mmask8)(U))) -#define _mm256_extracti64x2_epi64(A, imm) \ - ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ - (int)(imm), \ - (__v2di)_mm_undefined_si128(), \ - (__mmask8)-1)) +#define _mm256_extracti64x2_epi64(A, imm) \ + ((__m128i)__builtin_ia32_extracti64x2_256_mask( \ + (__v4di)(__m256i)(A), (int)(imm), (__v2di)_mm_setzero_si128(), \ + (__mmask8) - 1)) #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \ ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ @@ -1169,5 +1150,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR #endif diff --git a/lib/include/avx512vlfp16intrin.h b/lib/include/avx512vlfp16intrin.h index a12acb7d9a..4f9c7cb79e 100644 --- a/lib/include/avx512vlfp16intrin.h +++ b/lib/include/avx512vlfp16intrin.h @@ -19,51 +19,64 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512fp16,avx512vl,no-evex512"), \ + __target__("avx512fp16,avx512vl"), \ __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512fp16,avx512vl,no-evex512"), \ + __target__("avx512fp16,avx512vl"), \ __min_vector_width__(128))) -static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_cvtsh_h(__m128h __a) { +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#else +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#endif + +static __inline__ _Float16 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtsh_h(__m128h __a) { return __a[0]; } -static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_cvtsh_h(__m256h __a) { +static __inline__ _Float16 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtsh_h(__m256h __a) { return __a[0]; } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_set_sh(_Float16 __h) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_set_sh(_Float16 __h) { return __extension__(__m128h){__h, 0, 0, 0, 0, 0, 0, 0}; } -static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_set1_ph(_Float16 __h) { +static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_set1_ph(_Float16 __h) { return (__m128h)(__v8hf){__h, __h, __h, __h, __h, __h, __h, __h}; } -static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_set1_ph(_Float16 __h) { +static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_set1_ph(_Float16 __h) { return (__m256h)(__v16hf){__h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h}; } -static __inline __m128h __DEFAULT_FN_ATTRS128 +static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) { return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1}; } -static __inline __m256h __DEFAULT_FN_ATTRS256 +static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_set1_pch(_Float16 _Complex h) { return (__m256h)_mm256_set1_ps(__builtin_bit_cast(float, h)); } -static __inline __m128h __DEFAULT_FN_ATTRS128 +static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_set1_pch(_Float16 _Complex h) { return (__m128h)_mm_set1_ps(__builtin_bit_cast(float, h)); } -static __inline __m256h __DEFAULT_FN_ATTRS256 +static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8, _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12, @@ -73,13 +86,20 @@ _mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, __h4, __h3, __h2, __h1}; } -#define _mm_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8) \ - _mm_set_ph((h8), (h7), (h6), (h5), (h4), (h3), (h2), (h1)) +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_setr_ph(_Float16 e0, _Float16 e1, _Float16 e2, _Float16 e3, _Float16 e4, + _Float16 e5, _Float16 e6, _Float16 e7) { + return _mm_set_ph(e7, e6, e5, e4, e3, e2, e1, e0); +} -#define _mm256_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, \ - h14, h15, h16) \ - _mm256_set_ph((h16), (h15), (h14), (h13), (h12), (h11), (h10), (h9), (h8), \ - (h7), (h6), (h5), (h4), (h3), (h2), (h1)) +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_setr_ph(_Float16 e0, _Float16 e1, _Float16 e2, _Float16 e3, _Float16 e4, + _Float16 e5, _Float16 e6, _Float16 e7, _Float16 e8, _Float16 e9, + _Float16 e10, _Float16 e11, _Float16 e12, _Float16 e13, + _Float16 e14, _Float16 e15) { + return _mm256_set_ph(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, + e2, e1, e0); +} static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_add_ph(__m256h __A, __m256h __B) { @@ -229,12 +249,12 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_ph(__mmask8 __U, (__v8hf)_mm_setzero_ph()); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_min_ph(__m256h __A, - __m256h __B) { +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_min_ph(__m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, @@ -242,7 +262,7 @@ _mm256_mask_min_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { (__v16hf)__W); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_ph(__mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, @@ -250,34 +270,31 @@ _mm256_maskz_min_ph(__mmask16 __U, __m256h __A, __m256h __B) { (__v16hf)_mm256_setzero_ph()); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_ph(__m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_min_ph(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_ph(__m128h __W, - __mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_min_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B), (__v8hf)__W); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_ph(__mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_min_ph(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B), (__v8hf)_mm_setzero_ph()); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_max_ph(__m256h __A, - __m256h __B) { +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_max_ph(__m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, @@ -285,7 +302,7 @@ _mm256_mask_max_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { (__v16hf)__W); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_ph(__mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, @@ -293,33 +310,32 @@ _mm256_maskz_max_ph(__mmask16 __U, __m256h __A, __m256h __B) { (__v16hf)_mm256_setzero_ph()); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_ph(__m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_max_ph(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_ph(__m128h __W, - __mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_max_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B), (__v8hf)__W); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_ph(__mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_max_ph(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B), (__v8hf)_mm_setzero_ph()); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_abs_ph(__m256h __A) { +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_abs_ph(__m256h __A) { return (__m256h)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF), (__m256i)__A); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_abs_ph(__m128h __A) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_abs_ph(__m128h __A) { return (__m128h)_mm_and_epi32(_mm_set1_epi32(0x7FFF7FFF), (__m128i)__A); } @@ -601,7 +617,7 @@ _mm256_maskz_scalef_ph(__mmask16 __U, __m256h __A, __m256h __B) { (__mmask16)(U))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_ph(__m128h __a) { - return __builtin_ia32_sqrtph((__v8hf)__a); + return __builtin_elementwise_sqrt(__a); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ph(__m128h __W, @@ -618,7 +634,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ph(__mmask8 __U, } static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_sqrt_ph(__m256h __a) { - return (__m256h)__builtin_ia32_sqrtph256((__v16hf)__a); + return __builtin_elementwise_sqrt(__a); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 @@ -790,34 +806,35 @@ _mm256_maskz_cvttph_epi16(__mmask16 __U, __m256h __A) { (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi16_ph(__m128i __A) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi16_ph(__m128i __A) { return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph()); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi16_ph(__m256i __A) { return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepi16_ph(__mmask16 __U, __m256i __A) { return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), @@ -894,34 +911,35 @@ _mm256_maskz_cvttph_epu16(__mmask16 __U, __m256h __A) { (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu16_ph(__m128i __A) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepu16_ph(__m128i __A) { return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph()); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu16_ph(__m256i __A) { return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepu16_ph(__m256h __W, __mmask16 __U, __m256i __A) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepu16_ph(__mmask16 __U, __m256i __A) { return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), @@ -1015,18 +1033,18 @@ _mm_maskz_cvtepi32_ph(__mmask8 __U, __m128i __A) { (__v4si)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } -static __inline__ __m128h __DEFAULT_FN_ATTRS256 +static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi32_ph(__m256i __A) { return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf); } -static __inline__ __m128h __DEFAULT_FN_ATTRS256 +static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W); } -static __inline__ __m128h __DEFAULT_FN_ATTRS256 +static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepi32_ph(__mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph()); @@ -1049,18 +1067,18 @@ _mm_maskz_cvtepu32_ph(__mmask8 __U, __m128i __A) { (__v4su)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } -static __inline__ __m128h __DEFAULT_FN_ATTRS256 +static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu32_ph(__m256i __A) { return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf); } -static __inline__ __m128h __DEFAULT_FN_ATTRS256 +static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W); } -static __inline__ __m128h __DEFAULT_FN_ATTRS256 +static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepu32_ph(__mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph()); @@ -1419,8 +1437,8 @@ _mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) { static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_ph(__m128h __A, __m128h __B, __m128h __C) { - return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, - (__v8hf)__C); + return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, + (__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A, @@ -1429,7 +1447,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__A); } @@ -1437,7 +1455,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__C); } @@ -1445,15 +1463,15 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_ph(__m128h __A, __m128h __B, __m128h __C) { - return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, - -(__v8hf)__C); + return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, + -(__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ph(__m128h __A, @@ -1476,7 +1494,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__C); } @@ -1484,7 +1502,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)_mm_setzero_ph()); } @@ -1492,22 +1510,22 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), + __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_ph(__m256h __A, __m256h __B, __m256h __C) { - return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, - (__v16hf)__C); + return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, + (__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__A); } @@ -1515,7 +1533,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__C); } @@ -1523,22 +1541,22 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsub_ph(__m256h __A, __m256h __B, __m256h __C) { - return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, - -(__v16hf)__C); + return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, + -(__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)__A); } @@ -1546,7 +1564,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } @@ -1554,7 +1572,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__C); } @@ -1562,7 +1580,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } @@ -1570,7 +1588,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } @@ -1684,7 +1702,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), (__v8hf)__C); } @@ -1692,7 +1710,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)__C); } @@ -1715,45 +1733,45 @@ _mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C) { - return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, - (__v8hf)__C); + return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, + (__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C), (__v8hf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C) { - return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, - (__v16hf)__C); + return (__m256h)__builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, + (__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C), (__v16hf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C) { - return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, - -(__v8hf)__C); + return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, + -(__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C), (__v8hf)__A); } @@ -1761,22 +1779,22 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, - __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C), + __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C), (__v8hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C) { - return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, - -(__v16hf)__C); + return (__m256h)__builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, + -(__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C), (__v16hf)__A); } @@ -1784,7 +1802,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, - __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C), + __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C), (__v16hf)__C); } @@ -1974,37 +1992,36 @@ _mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) { (__v8sf)__C, (__mmask8)__U); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U, - __m128h __A, - __m128h __W) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) { return (__m128h)__builtin_ia32_selectph_128((__mmask8)__U, (__v8hf)__W, (__v8hf)__A); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) { return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)__W, (__v16hf)__A); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutex2var_ph(__m128h __A, __m128i __I, __m128h __B) { return (__m128h)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I, (__v8hi)__B); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutex2var_ph(__m256h __A, __m256i __I, __m256h __B) { return (__m256h)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I, (__v16hi)__B); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutexvar_ph(__m128i __A, __m128h __B) { return (__m128h)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutexvar_ph(__m256i __A, __m256h __B) { return (__m256h)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A); } @@ -2066,6 +2083,8 @@ _mm_reduce_min_ph(__m128h __V) { #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR #endif #endif diff --git a/lib/include/avx512vlintrin.h b/lib/include/avx512vlintrin.h index 2a5f7b43f6..ea43046240 100644 --- a/lib/include/avx512vlintrin.h +++ b/lib/include/avx512vlintrin.h @@ -15,14 +15,20 @@ #define __AVX512VLINTRIN_H #define __DEFAULT_FN_ATTRS128 \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,no-evex512"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,no-evex512"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), \ __min_vector_width__(256))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#else +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#endif + typedef short __v2hi __attribute__((__vector_size__(4))); typedef char __v4qi __attribute__((__vector_size__(4))); typedef char __v2qi __attribute__((__vector_size__(2))); @@ -229,209 +235,183 @@ typedef char __v2qi __attribute__((__vector_size__(2))); #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \ _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_add_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_add_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_add_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_add_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_sub_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_sub_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_sub_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_sub_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_add_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_add_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_add_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_add_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_sub_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_sub_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_sub_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_sub_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_mul_epi32(__X, __Y), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_mul_epi32(__X, __Y), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_mul_epi32(__X, __Y), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_mul_epi32(__X, __Y), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_mul_epu32(__X, __Y), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_mul_epu32(__X, __Y), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_mul_epu32(__X, __Y), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_mul_epu32(__X, __Y), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_mullo_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_mullo_epi32(__A, __B), (__v8si)__W); @@ -453,9 +433,8 @@ _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_and_epi32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_and_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a & (__v8su)__b); } @@ -473,9 +452,8 @@ _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B) return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_and_epi32(__m128i __a, __m128i __b) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_and_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4su)__a & (__v4su)__b); } @@ -896,329 +874,312 @@ _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) (__v2df)(__m128d)(b), (int)(p), \ (__mmask8)(m))) -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) -{ - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - (__v2df) __B, - (__v2df) __C), - (__v2df) __A); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, (__v2df)_mm_fmadd_pd(__A, __B, __C), (__v2df)__A); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, (__v2df)_mm_fmadd_pd(__A, __B, __C), (__v2df)__C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, (__v2df)_mm_fmadd_pd(__A, __B, __C), + (__v2df)_mm_setzero_pd()); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C), (__v2df)__A); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C), (__v2df)__C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C), + (__v2df)_mm_setzero_pd()); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, (__v2df)_mm_fnmadd_pd(__A, __B, __C), (__v2df)__A); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, (__v2df)_mm_fnmadd_pd(__A, __B, __C), (__v2df)__C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, (__v2df)_mm_fnmadd_pd(__A, __B, __C), + (__v2df)_mm_setzero_pd()); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C), (__v2df)__A); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C), (__v2df)__C); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C), + (__v2df)_mm_setzero_pd()); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, (__v4df)_mm256_fmadd_pd(__A, __B, __C), (__v4df)__A); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, (__v4df)_mm256_fmadd_pd(__A, __B, __C), (__v4df)__C); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, (__v4df)_mm256_fmadd_pd(__A, __B, __C), + (__v4df)_mm256_setzero_pd()); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C), (__v4df)__A); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C), (__v4df)__C); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C), + (__v4df)_mm256_setzero_pd()); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, (__v4df)_mm256_fnmadd_pd(__A, __B, __C), (__v4df)__A); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, (__v4df)_mm256_fnmadd_pd(__A, __B, __C), (__v4df)__C); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, (__v4df)_mm256_fnmadd_pd(__A, __B, __C), + (__v4df)_mm256_setzero_pd()); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C), (__v4df)__A); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C), (__v4df)__C); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C), + (__v4df)_mm256_setzero_pd()); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_fmadd_ps(__A, __B, __C), (__v4sf)__A); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_fmadd_ps(__A, __B, __C), (__v4sf)__C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_fmadd_ps(__A, __B, __C), + (__v4sf)_mm_setzero_ps()); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C), (__v4sf)__A); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C), (__v4sf)__C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C), + (__v4sf)_mm_setzero_ps()); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_fnmadd_ps(__A, __B, __C), (__v4sf)__A); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_fnmadd_ps(__A, __B, __C), (__v4sf)__C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_fnmadd_ps(__A, __B, __C), + (__v4sf)_mm_setzero_ps()); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C), (__v4sf)__A); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C), (__v4sf)__C); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C), + (__v4sf)_mm_setzero_ps()); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_fmadd_ps(__A, __B, __C), (__v8sf)__A); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_fmadd_ps(__A, __B, __C), (__v8sf)__C); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_fmadd_ps(__A, __B, __C), + (__v8sf)_mm256_setzero_ps()); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C), (__v8sf)__A); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C), (__v8sf)__C); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C), + (__v8sf)_mm256_setzero_ps()); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_fnmadd_ps(__A, __B, __C), (__v8sf)__A); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_fnmadd_ps(__A, __B, __C), (__v8sf)__C); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_fnmadd_ps(__A, __B, __C), + (__v8sf)_mm256_setzero_ps()); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C), (__v8sf)__A); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C), (__v8sf)__C); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) -{ - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - (__v2df) __B, - (__v2df) __C), - (__v2df) __C); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - (__v2df) __B, - (__v2df) __C), - (__v2df)_mm_setzero_pd()); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) -{ - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - (__v2df) __B, - -(__v2df) __C), - (__v2df) __A); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - (__v2df) __B, - -(__v2df) __C), - (__v2df)_mm_setzero_pd()); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) -{ - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd (-(__v2df) __A, - (__v2df) __B, - (__v2df) __C), - (__v2df) __C); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd (-(__v2df) __A, - (__v2df) __B, - (__v2df) __C), - (__v2df)_mm_setzero_pd()); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd (-(__v2df) __A, - (__v2df) __B, - -(__v2df) __C), - (__v2df)_mm_setzero_pd()); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) -{ - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - (__v4df) __B, - (__v4df) __C), - (__v4df) __A); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) -{ - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - (__v4df) __B, - (__v4df) __C), - (__v4df) __C); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) -{ - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - (__v4df) __B, - (__v4df) __C), - (__v4df)_mm256_setzero_pd()); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) -{ - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - (__v4df) __B, - -(__v4df) __C), - (__v4df) __A); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) -{ - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - (__v4df) __B, - -(__v4df) __C), - (__v4df)_mm256_setzero_pd()); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) -{ - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 (-(__v4df) __A, - (__v4df) __B, - (__v4df) __C), - (__v4df) __C); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) -{ - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 (-(__v4df) __A, - (__v4df) __B, - (__v4df) __C), - (__v4df)_mm256_setzero_pd()); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) -{ - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 (-(__v4df) __A, - (__v4df) __B, - -(__v4df) __C), - (__v4df)_mm256_setzero_pd()); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) -{ - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C), - (__v4sf) __A); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) -{ - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C), - (__v4sf) __C); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) -{ - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C), - (__v4sf)_mm_setzero_ps()); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) -{ - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C), - (__v4sf) __A); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) -{ - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C), - (__v4sf)_mm_setzero_ps()); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) -{ - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps (-(__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C), - (__v4sf) __C); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) -{ - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps (-(__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C), - (__v4sf)_mm_setzero_ps()); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) -{ - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps (-(__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C), - (__v4sf)_mm_setzero_ps()); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) -{ - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C), - (__v8sf) __A); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) -{ - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C), - (__v8sf) __C); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) -{ - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C), - (__v8sf)_mm256_setzero_ps()); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) -{ - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - (__v8sf) __B, - -(__v8sf) __C), - (__v8sf) __A); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) -{ - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - (__v8sf) __B, - -(__v8sf) __C), - (__v8sf)_mm256_setzero_ps()); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) -{ - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 (-(__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C), - (__v8sf) __C); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) -{ - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 (-(__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C), - (__v8sf)_mm256_setzero_ps()); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) -{ - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 (-(__v8sf) __A, - (__v8sf) __B, - -(__v8sf) __C), - (__v8sf)_mm256_setzero_ps()); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) -{ +_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, __builtin_ia32_vfmaddsubpd ((__v2df) __A, (__v2df) __B, @@ -1417,46 +1378,6 @@ _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) -{ - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - (__v2df) __B, - -(__v2df) __C), - (__v2df) __C); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) -{ - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - (__v4df) __B, - -(__v4df) __C), - (__v4df) __C); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) -{ - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C), - (__v4sf) __C); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) -{ - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - (__v8sf) __B, - -(__v8sf) __C), - (__v8sf) __C); -} - static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { @@ -1497,233 +1418,113 @@ _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) (__v8sf) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) -{ - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - -(__v2df) __B, - (__v2df) __C), - (__v2df) __A); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) -{ - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - -(__v4df) __B, - (__v4df) __C), - (__v4df) __A); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) -{ - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - -(__v4sf) __B, - (__v4sf) __C), - (__v4sf) __A); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) -{ - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - -(__v8sf) __B, - (__v8sf) __C), - (__v8sf) __A); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) -{ - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - -(__v2df) __B, - -(__v2df) __C), - (__v2df) __A); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) -{ - return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, - __builtin_ia32_vfmaddpd ((__v2df) __A, - -(__v2df) __B, - -(__v2df) __C), - (__v2df) __C); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) -{ - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - -(__v4df) __B, - -(__v4df) __C), - (__v4df) __A); -} - -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) -{ - return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, - __builtin_ia32_vfmaddpd256 ((__v4df) __A, - -(__v4df) __B, - -(__v4df) __C), - (__v4df) __C); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) -{ - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - -(__v4sf) __B, - -(__v4sf) __C), - (__v4sf) __A); -} - -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) -{ - return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, - __builtin_ia32_vfmaddps ((__v4sf) __A, - -(__v4sf) __B, - -(__v4sf) __C), - (__v4sf) __C); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) -{ - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - -(__v8sf) __B, - -(__v8sf) __C), - (__v8sf) __A); -} - -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) -{ - return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, - __builtin_ia32_vfmaddps256 ((__v8sf) __A, - -(__v8sf) __B, - -(__v8sf) __C), - (__v8sf) __C); -} - -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_add_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_add_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_add_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_add_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_add_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_add_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_add_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_add_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, (__v4si) __W, (__v4si) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, (__v8si) __W, (__v8si) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W) { return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, (__v2df) __W, (__v2df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W) { return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, (__v4df) __W, (__v4df) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) { return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, (__v4sf) __W, (__v4sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W) { return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, (__v8sf) __W, (__v8sf) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, (__v2di) __W, (__v2di) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, (__v4di) __W, (__v4di) __A); @@ -1905,57 +1706,57 @@ _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) { (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepi32_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepi32_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepi32_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepi32_pd(__A), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepi32_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_cvtepi32_ps(__mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepi32_ps(__A), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) { +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepi32_ps(__A), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) { +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepi32_ps(__mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepi32_ps(__A), (__v8sf)_mm256_setzero_ps()); @@ -1990,30 +1791,30 @@ _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) { (__v4si)_mm_setzero_si128()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A) { return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A) { return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, (__v4sf) _mm_setzero_ps (), (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtpd_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtpd_ps(__A), (__v4sf)_mm_setzero_ps()); @@ -2316,133 +2117,133 @@ _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) { (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_cvtepu32_pd (__m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepu32_pd(__m128i __A) { return (__m128d) __builtin_convertvector( __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepu32_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepu32_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_cvtepu32_pd (__m128i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu32_pd(__m128i __A) { return (__m256d)__builtin_convertvector((__v4su)__A, __v4df); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepu32_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepu32_pd(__A), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_cvtepu32_ps (__m128i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepu32_ps(__m128i __A) { return (__m128)__builtin_convertvector((__v4su)__A, __v4sf); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepu32_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepu32_ps(__A), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_cvtepu32_ps (__m256i __A) { +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu32_ps(__m256i __A) { return (__m256)__builtin_convertvector((__v8su)__A, __v8sf); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) { +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepu32_ps(__A), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) { +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepu32_ps(__A), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_div_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_div_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_div_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_div_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_div_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_div_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_div_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_div_ps(__A, __B), @@ -2796,499 +2597,499 @@ _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) { (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_max_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_max_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_max_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_max_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_max_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_max_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_max_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_max_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_min_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_min_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_min_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_min_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_min_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_min_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_min_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_min_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_mul_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_mul_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_mul_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_mul_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_mul_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_mul_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_mul_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_mul_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_abs_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_abs_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_abs_epi32(__A), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_abs_epi32(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_abs_epi64 (__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_abs_epi64(__m128i __A) { return (__m128i)__builtin_elementwise_abs((__v2di)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_abs_epi64(__A), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_abs_epi64(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_abs_epi64(__A), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_abs_epi64 (__m256i __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_abs_epi64(__m256i __A) { return (__m256i)__builtin_elementwise_abs((__v4di)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_abs_epi64(__A), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_abs_epi64(__A), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_max_epi64 (__m128i __A, __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_max_epi64(__m128i __A, __m128i __B) { return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_max_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_max_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_max_epi64 (__m256i __A, __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_max_epi64(__m256i __A, __m256i __B) { return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_max_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_max_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epu32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epu32(__A, __B), (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epu32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epu32(__A, __B), (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_max_epu64 (__m128i __A, __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_max_epu64(__m128i __A, __m128i __B) { return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_max_epu64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_max_epu64(__A, __B), (__v2di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_max_epu64 (__m256i __A, __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_max_epu64(__m256i __A, __m256i __B) { return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_max_epu64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_max_epu64(__A, __B), (__v4di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_min_epi64 (__m128i __A, __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_min_epi64(__m128i __A, __m128i __B) { return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_min_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_min_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_min_epi64 (__m256i __A, __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_min_epi64(__m256i __A, __m256i __B) { return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_min_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_min_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epu32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epu32(__A, __B), (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epu32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epu32(__A, __B), (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_min_epu64 (__m128i __A, __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_min_epu64(__m128i __A, __m128i __B) { return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_min_epu64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_min_epu64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_min_epu64 (__m256i __A, __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_min_epu64(__m256i __A, __m256i __B) { return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_min_epu64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_min_epu64(__A, __B), (__v4di)_mm256_setzero_si256()); @@ -3691,69 +3492,69 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v8sf)_mm256_setzero_ps()); } - static __inline__ __m128d __DEFAULT_FN_ATTRS128 + static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_sub_pd(__A, __B), (__v2df)__W); } - static __inline__ __m128d __DEFAULT_FN_ATTRS128 + static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_sub_pd(__A, __B), (__v2df)_mm_setzero_pd()); } - static __inline__ __m256d __DEFAULT_FN_ATTRS256 + static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_sub_pd(__A, __B), (__v4df)__W); } - static __inline__ __m256d __DEFAULT_FN_ATTRS256 + static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_sub_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } - static __inline__ __m128 __DEFAULT_FN_ATTRS128 + static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_sub_ps(__A, __B), (__v4sf)__W); } - static __inline__ __m128 __DEFAULT_FN_ATTRS128 + static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_sub_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } - static __inline__ __m256 __DEFAULT_FN_ATTRS256 + static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_sub_ps(__A, __B), (__v8sf)__W); } - static __inline__ __m256 __DEFAULT_FN_ATTRS256 + static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_sub_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS128 + static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I, (__v4si)__B); } - static __inline__ __m128i __DEFAULT_FN_ATTRS128 + static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -3761,7 +3562,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v4si)__A); } - static __inline__ __m128i __DEFAULT_FN_ATTRS128 + static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -3769,7 +3570,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v4si)__I); } - static __inline__ __m128i __DEFAULT_FN_ATTRS128 + static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -3777,13 +3578,13 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v4si)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS256 + static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I, (__v8si) __B); } - static __inline__ __m256i __DEFAULT_FN_ATTRS256 + static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -3791,7 +3592,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v8si)__A); } - static __inline__ __m256i __DEFAULT_FN_ATTRS256 + static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -3799,7 +3600,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v8si)__I); } - static __inline__ __m256i __DEFAULT_FN_ATTRS256 + static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -3807,40 +3608,43 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v8si)_mm256_setzero_si256()); } - static __inline__ __m128d __DEFAULT_FN_ATTRS128 + static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) { return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I, (__v2df)__B); } - static __inline__ __m128d __DEFAULT_FN_ATTRS128 - _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) { + static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR + _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, + __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128(__U, (__v2df)_mm_permutex2var_pd(__A, __I, __B), (__v2df)__A); } - static __inline__ __m128d __DEFAULT_FN_ATTRS128 - _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) { + static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR + _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, + __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128(__U, (__v2df)_mm_permutex2var_pd(__A, __I, __B), (__v2df)(__m128d)__I); } - static __inline__ __m128d __DEFAULT_FN_ATTRS128 - _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) { + static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR + _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, + __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128(__U, (__v2df)_mm_permutex2var_pd(__A, __I, __B), (__v2df)_mm_setzero_pd()); } - static __inline__ __m256d __DEFAULT_FN_ATTRS256 + static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) { return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I, (__v4df)__B); } - static __inline__ __m256d __DEFAULT_FN_ATTRS256 + static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256(__U, @@ -3848,7 +3652,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v4df)__A); } - static __inline__ __m256d __DEFAULT_FN_ATTRS256 + static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256(__U, @@ -3856,7 +3660,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v4df)(__m256d)__I); } - static __inline__ __m256d __DEFAULT_FN_ATTRS256 + static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256(__U, @@ -3864,47 +3668,48 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v4df)_mm256_setzero_pd()); } - static __inline__ __m128 __DEFAULT_FN_ATTRS128 + static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) { return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I, (__v4sf)__B); } - static __inline__ __m128 __DEFAULT_FN_ATTRS128 + static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) { return (__m128)__builtin_ia32_selectps_128(__U, (__v4sf)_mm_permutex2var_ps(__A, __I, __B), (__v4sf)__A); } - static __inline__ __m128 __DEFAULT_FN_ATTRS128 + static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) { return (__m128)__builtin_ia32_selectps_128(__U, (__v4sf)_mm_permutex2var_ps(__A, __I, __B), (__v4sf)(__m128)__I); } - static __inline__ __m128 __DEFAULT_FN_ATTRS128 + static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) { return (__m128)__builtin_ia32_selectps_128(__U, (__v4sf)_mm_permutex2var_ps(__A, __I, __B), (__v4sf)_mm_setzero_ps()); } - static __inline__ __m256 __DEFAULT_FN_ATTRS256 + static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) { return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I, (__v8sf) __B); } - static __inline__ __m256 __DEFAULT_FN_ATTRS256 - _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) { + static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR + _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, + __m256 __B) { return (__m256)__builtin_ia32_selectps_256(__U, (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), (__v8sf)__A); } - static __inline__ __m256 __DEFAULT_FN_ATTRS256 + static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, __m256 __B) { return (__m256)__builtin_ia32_selectps_256(__U, @@ -3912,7 +3717,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v8sf)(__m256)__I); } - static __inline__ __m256 __DEFAULT_FN_ATTRS256 + static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B) { return (__m256)__builtin_ia32_selectps_256(__U, @@ -3920,13 +3725,13 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v8sf)_mm256_setzero_ps()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS128 + static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I, (__v2di)__B); } - static __inline__ __m128i __DEFAULT_FN_ATTRS128 + static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -3934,7 +3739,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v2di)__A); } - static __inline__ __m128i __DEFAULT_FN_ATTRS128 + static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -3942,7 +3747,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v2di)__I); } - static __inline__ __m128i __DEFAULT_FN_ATTRS128 + static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -3950,14 +3755,13 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v2di)_mm_setzero_si128()); } - - static __inline__ __m256i __DEFAULT_FN_ATTRS256 + static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I, (__v4di) __B); } - static __inline__ __m256i __DEFAULT_FN_ATTRS256 + static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -3965,7 +3769,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v4di)__A); } - static __inline__ __m256i __DEFAULT_FN_ATTRS256 + static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -3973,7 +3777,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v4di)__I); } - static __inline__ __m256i __DEFAULT_FN_ATTRS256 + static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -4355,13 +4159,13 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v4di)_mm256_rol_epi64((a), (b)), \ (__v4di)_mm256_setzero_si256())) -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_rolv_epi32 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B); + return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -4369,7 +4173,7 @@ _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -4377,13 +4181,13 @@ _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_rolv_epi32 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B); + return (__m256i)__builtin_elementwise_fshl((__v8su)__A, (__v8su)__A, (__v8su)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -4391,7 +4195,7 @@ _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -4399,13 +4203,13 @@ _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_rolv_epi64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B); + return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -4413,7 +4217,7 @@ _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -4421,13 +4225,13 @@ _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_rolv_epi64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B); + return (__m256i)__builtin_elementwise_fshl((__v4du)__A, (__v4du)__A, (__v4du)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -4435,7 +4239,7 @@ _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -4495,33 +4299,29 @@ _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) (__v4di)_mm256_ror_epi64((a), (b)), \ (__v4di)_mm256_setzero_si256())) -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_sll_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_sll_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_sll_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_sll_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); @@ -4543,49 +4343,44 @@ _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, + unsigned int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_slli_epi32(__A, (int)__B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_slli_epi32(__A, (int)__B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_sll_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_sll_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_sll_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_sll_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); @@ -4607,29 +4402,28 @@ _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, + unsigned int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_slli_epi64(__A, (int)__B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_slli_epi64(__A, (int)__B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_rorv_epi32 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B); + return (__m128i)__builtin_elementwise_fshr((__v4su)__A, (__v4su)__A, (__v4su)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -4637,7 +4431,7 @@ _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -4645,13 +4439,13 @@ _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_rorv_epi32 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B); + return (__m256i)__builtin_elementwise_fshr((__v8su)__A, (__v8su)__A, (__v8su)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -4659,7 +4453,7 @@ _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -4667,13 +4461,13 @@ _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_rorv_epi64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B); + return (__m128i)__builtin_elementwise_fshr((__v2du)__A, (__v2du)__A, (__v2du)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -4681,7 +4475,7 @@ _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -4689,13 +4483,13 @@ _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_rorv_epi64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B); + return (__m256i)__builtin_elementwise_fshr((__v4du)__A, (__v4du)__A, (__v4du)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -4703,7 +4497,7 @@ _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -4711,7 +4505,7 @@ _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4719,7 +4513,7 @@ _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4727,7 +4521,7 @@ _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4735,7 +4529,7 @@ _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4743,7 +4537,7 @@ _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4751,7 +4545,7 @@ _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4759,7 +4553,7 @@ _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4767,7 +4561,7 @@ _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4775,7 +4569,7 @@ _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4783,7 +4577,7 @@ _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4791,7 +4585,7 @@ _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4799,7 +4593,7 @@ _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4807,7 +4601,7 @@ _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4815,7 +4609,7 @@ _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4823,7 +4617,7 @@ _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4831,7 +4625,7 @@ _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4839,33 +4633,29 @@ _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_srl_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_srl_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srl_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srl_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); @@ -4887,49 +4677,44 @@ _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, + unsigned int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srli_epi32(__A, (int)__B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srli_epi32(__A, (int)__B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_srl_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_srl_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_srl_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_srl_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); @@ -4951,23 +4736,22 @@ _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, + unsigned int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_srli_epi64(__A, (int)__B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_srli_epi64(__A, (int)__B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4975,7 +4759,7 @@ _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4983,7 +4767,7 @@ _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4991,7 +4775,7 @@ _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4999,13 +4783,13 @@ _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srav_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -5013,7 +4797,7 @@ _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -5021,13 +4805,13 @@ _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srav_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -5035,7 +4819,7 @@ _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -5296,69 +5080,55 @@ _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) -{ - return (__m128i)__builtin_ia32_selectd_128(__M, - (__v4si) _mm_set1_epi32(__A), - (__v4si)__O); +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) { + return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si)_mm_set1_epi32(__A), + (__v4si)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_set1_epi32( __mmask8 __M, int __A) -{ - return (__m128i)__builtin_ia32_selectd_128(__M, - (__v4si) _mm_set1_epi32(__A), - (__v4si)_mm_setzero_si128()); +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_set1_epi32(__mmask8 __M, int __A) { + return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si)_mm_set1_epi32(__A), + (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) -{ - return (__m256i)__builtin_ia32_selectd_256(__M, - (__v8si) _mm256_set1_epi32(__A), - (__v8si)__O); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) { + return (__m256i)__builtin_ia32_selectd_256( + __M, (__v8si)_mm256_set1_epi32(__A), (__v8si)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_set1_epi32( __mmask8 __M, int __A) -{ - return (__m256i)__builtin_ia32_selectd_256(__M, - (__v8si) _mm256_set1_epi32(__A), - (__v8si)_mm256_setzero_si256()); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_set1_epi32(__mmask8 __M, int __A) { + return (__m256i)__builtin_ia32_selectd_256( + __M, (__v8si)_mm256_set1_epi32(__A), (__v8si)_mm256_setzero_si256()); } - -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A) { return (__m128i) __builtin_ia32_selectq_128(__M, (__v2di) _mm_set1_epi64x(__A), (__v2di) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_set1_epi64 (__mmask8 __M, long long __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_set1_epi64(__mmask8 __M, long long __A) { return (__m128i) __builtin_ia32_selectq_128(__M, (__v2di) _mm_set1_epi64x(__A), (__v2di) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A) { return (__m256i) __builtin_ia32_selectq_256(__M, (__v4di) _mm256_set1_epi64x(__A), (__v4di) __O) ; } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) -{ - return (__m256i) __builtin_ia32_selectq_256(__M, - (__v4di) _mm256_set1_epi64x(__A), - (__v4di) _mm256_setzero_si256()); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_set1_epi64(__mmask8 __M, long long __A) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)_mm256_set1_epi64x(__A), (__v4di)_mm256_setzero_si256()); } #define _mm_fixupimm_pd(A, B, C, imm) \ @@ -5805,130 +5575,113 @@ _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) (__mmask8) __U); } - -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_unpackhi_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_unpackhi_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_unpackhi_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_unpackhi_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_unpackhi_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_unpackhi_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_unpackhi_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_unpackhi_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_unpacklo_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_unpacklo_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_unpacklo_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_unpacklo_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_unpacklo_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_unpacklo_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_unpacklo_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_unpacklo_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); @@ -6078,65 +5831,57 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) (__v8sf)_mm256_permute_ps((X), (C)), \ (__v8sf)_mm256_setzero_ps())) -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_permutevar_pd(__A, __C), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_permutevar_pd(__A, __C), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_permutevar_pd(__A, __C), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_permutevar_pd(__A, __C), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_permutevar_ps(__A, __C), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_permutevar_ps(__A, __C), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_permutevar_ps(__A, __C), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_permutevar_ps(__A, __C), (__v8sf)_mm256_setzero_ps()); @@ -6250,282 +5995,251 @@ _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_unpackhi_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_unpackhi_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_unpackhi_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_unpackhi_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpackhi_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpackhi_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_unpackhi_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_unpackhi_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_unpacklo_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_unpacklo_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_unpacklo_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_unpacklo_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpacklo_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpacklo_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_unpacklo_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_unpacklo_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_sra_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_sra_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_sra_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_sra_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_srai_epi32(__A, (int)__B), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_srai_epi32(__A, (int)__B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, + unsigned int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srai_epi32(__A, (int)__B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_srai_epi32(__A, (int)__B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_sra_epi64(__m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_sra_epi64(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ (__v2di)_mm_sra_epi64(__A, __B), \ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ (__v2di)_mm_sra_epi64(__A, __B), \ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sra_epi64(__m256i __A, __m128i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sra_epi64(__m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ (__v4di)_mm256_sra_epi64(__A, __B), \ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ (__v4di)_mm256_sra_epi64(__A, __B), \ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_srai_epi64(__m128i __A, unsigned int __imm) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_srai_epi64(__m128i __A, unsigned int __imm) { return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, (int)__imm); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srai_epi64( + __m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ (__v2di)_mm_srai_epi64(__A, __imm), \ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ (__v2di)_mm_srai_epi64(__A, __imm), \ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srai_epi64(__m256i __A, unsigned int __imm) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srai_epi64(__m256i __A, unsigned int __imm) { return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, (int)__imm); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, - unsigned int __imm) -{ + unsigned int __imm) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ (__v4di)_mm256_srai_epi64(__A, __imm), \ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ (__v4di)_mm256_srai_epi64(__A, __imm), \ (__v4di)_mm256_setzero_si256()); @@ -6792,159 +6506,139 @@ _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_broadcast_f32x4(__m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcast_f32x4(__m128 __A) { return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, (__v8sf)_mm256_broadcast_f32x4(__A), (__v8sf)__O); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, (__v8sf)_mm256_broadcast_f32x4(__A), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcast_i32x4(__m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcast_i32x4(__m128i __A) { return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_broadcast_i32x4(__A), (__v8si)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_broadcast_i32x4(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256(__M, (__v4df) _mm256_broadcastsd_pd(__A), (__v4df) __O); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256(__M, (__v4df) _mm256_broadcastsd_pd(__A), (__v4df) _mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A) { return (__m128)__builtin_ia32_selectps_128(__M, (__v4sf) _mm_broadcastss_ps(__A), (__v4sf) __O); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) { return (__m128)__builtin_ia32_selectps_128(__M, (__v4sf) _mm_broadcastss_ps(__A), (__v4sf) _mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256(__M, (__v8sf) _mm256_broadcastss_ps(__A), (__v8sf) __O); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256(__M, (__v8sf) _mm256_broadcastss_ps(__A), (__v8sf) _mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si) _mm_broadcastd_epi32(__A), (__v4si) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si) _mm_broadcastd_epi32(__A), (__v4si) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256(__M, (__v8si) _mm256_broadcastd_epi32(__A), (__v8si) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256(__M, (__v8si) _mm256_broadcastd_epi32(__A), (__v8si) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128(__M, (__v2di) _mm_broadcastq_epi64(__A), (__v2di) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128(__M, (__v2di) _mm_broadcastq_epi64(__A), (__v2di) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256(__M, (__v4di) _mm256_broadcastq_epi64(__A), (__v4di) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256(__M, (__v4di) _mm256_broadcastq_epi64(__A), (__v4di) _mm256_setzero_si256()); @@ -7536,9 +7230,8 @@ _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtepi32_epi8 (__m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi32_epi8(__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); @@ -7566,9 +7259,8 @@ _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi32_epi8 (__m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi32_epi8(__m256i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v8si)__A, __v8qi), (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, @@ -7576,8 +7268,7 @@ _mm256_cvtepi32_epi8 (__m256i __A) } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) -{ +_mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, (__v16qi) __O, __M); } @@ -7596,9 +7287,8 @@ _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtepi32_epi16 (__m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi32_epi16(__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7); @@ -7625,9 +7315,8 @@ _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi32_epi16 (__m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi32_epi16(__m256i __A) { return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi); } @@ -7652,9 +7341,8 @@ _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtepi64_epi8 (__m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi64_epi8(__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); @@ -7681,9 +7369,8 @@ _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi64_epi8 (__m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi64_epi8(__m256i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); @@ -7710,9 +7397,8 @@ _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtepi64_epi32 (__m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi64_epi32(__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3); } @@ -7738,23 +7424,20 @@ _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi64_epi32 (__m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi64_epi32(__m256i __A) { return (__m128i)__builtin_convertvector((__v4di)__A, __v4si); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm256_cvtepi64_epi32(__A), (__v4si)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepi64_epi32(__mmask8 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm256_cvtepi64_epi32(__A), (__v4si)_mm_setzero_si128()); @@ -7766,9 +7449,8 @@ _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtepi64_epi16 (__m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi64_epi16(__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3, 3, 3, 3, 3); @@ -7796,9 +7478,8 @@ _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi64_epi16 (__m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi64_epi16(__m256i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7); @@ -7825,11 +7506,10 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); } -#define _mm256_extractf32x4_ps(A, imm) \ - ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ - (int)(imm), \ - (__v4sf)_mm_undefined_ps(), \ - (__mmask8)-1)) +#define _mm256_extractf32x4_ps(A, imm) \ + ((__m128)__builtin_ia32_extractf32x4_256_mask( \ + (__v8sf)(__m256)(A), (int)(imm), (__v4sf)_mm_setzero_ps(), \ + (__mmask8) - 1)) #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \ ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ @@ -7843,11 +7523,10 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U))) -#define _mm256_extracti32x4_epi32(A, imm) \ - ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ - (int)(imm), \ - (__v4si)_mm_undefined_si128(), \ - (__mmask8)-1)) +#define _mm256_extracti32x4_epi32(A, imm) \ + ((__m128i)__builtin_ia32_extracti32x4_256_mask( \ + (__v8si)(__m256i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \ + (__mmask8) - 1)) #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \ ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ @@ -8083,47 +7762,41 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) (__v4di)_mm256_permutex_epi64((X), (C)), \ (__v4di)_mm256_setzero_si256())) -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_permutexvar_pd (__m256i __X, __m256d __Y) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_permutexvar_pd(__m256i __X, __m256d __Y) { return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, - __m256d __Y) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_permutexvar_pd(__m256d __W, __mmask8 __U, __m256i __X, + __m256d __Y) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_permutexvar_pd(__X, __Y), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_permutexvar_pd(__mmask8 __U, __m256i __X, __m256d __Y) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_permutexvar_pd(__X, __Y), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_permutexvar_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_permutexvar_epi64(__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_permutexvar_epi64(__X, __Y), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, - __m256i __Y) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X, + __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_permutexvar_epi64(__X, __Y), (__v4di)__W); @@ -8131,17 +7804,15 @@ _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A)) -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_permutexvar_ps(__X, __Y), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_permutexvar_ps(__X, __Y), (__v8sf)_mm256_setzero_ps()); @@ -8149,18 +7820,16 @@ _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A)) -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, - __m256i __Y) -{ + __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_permutexvar_epi32(__X, __Y), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_permutexvar_epi32(__X, __Y), (__v8si)_mm256_setzero_si256()); @@ -8222,65 +7891,57 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ (__v4di)_mm256_setzero_si256())) -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_movehdup_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_movehdup_ps(__A), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_movehdup_ps(__A), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_movehdup_ps(__A), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_moveldup_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_moveldup_ps(__A), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_moveldup_ps(__A), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_moveldup_ps(__A), (__v8sf)_mm256_setzero_ps()); @@ -8306,68 +7967,52 @@ _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) (__v4si)_mm_shuffle_epi32((A), (I)), \ (__v4si)_mm_setzero_si128())) -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) -{ - return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, - (__v2df) __A, - (__v2df) __W); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)__A, + (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_mov_pd (__mmask8 __U, __m128d __A) -{ - return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, - (__v2df) __A, - (__v2df) _mm_setzero_pd ()); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_mov_pd(__mmask8 __U, __m128d __A) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)__A, + (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) -{ - return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, - (__v4df) __A, - (__v4df) __W); +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)__A, + (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) -{ - return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, - (__v4df) __A, - (__v4df) _mm256_setzero_pd ()); +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_mov_pd(__mmask8 __U, __m256d __A) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)__A, + (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) -{ - return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, - (__v4sf) __A, - (__v4sf) __W); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)__A, + (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_mov_ps (__mmask8 __U, __m128 __A) -{ - return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, - (__v4sf) __A, - (__v4sf) _mm_setzero_ps ()); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_mov_ps(__mmask8 __U, __m128 __A) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)__A, + (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) -{ - return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, - (__v8sf) __A, - (__v8sf) __W); +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)__A, + (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) -{ - return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, - (__v8sf) __A, - (__v8sf) _mm256_setzero_ps ()); +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_mov_ps(__mmask8 __U, __m256 __A) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)__A, + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 @@ -8430,8 +8075,9 @@ _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) #define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph #define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph - #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR #endif /* __AVX512VLINTRIN_H */ diff --git a/lib/include/avx512vlvbmi2intrin.h b/lib/include/avx512vlvbmi2intrin.h index 77af2d5cbd..da295d2a12 100644 --- a/lib/include/avx512vlvbmi2intrin.h +++ b/lib/include/avx512vlvbmi2intrin.h @@ -17,13 +17,21 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,avx512vbmi2,no-evex512"), \ + __target__("avx512vl,avx512vbmi2"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,avx512vbmi2,no-evex512"), \ + __target__("avx512vl,avx512vbmi2"), \ __min_vector_width__(256))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#else +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#endif + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) { @@ -412,14 +420,14 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ (__v8hi)_mm_setzero_si128())) -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i)__builtin_ia32_vpshldvq256((__v4di)__A, (__v4di)__B, - (__v4di)__C); + return (__m256i)__builtin_elementwise_fshl((__v4du)__A, (__v4du)__B, + (__v4du)__C); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_shldv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -427,7 +435,7 @@ _mm256_mask_shldv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) (__v4di)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -435,14 +443,14 @@ _mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i)__builtin_ia32_vpshldvq128((__v2di)__A, (__v2di)__B, - (__v2di)__C); + return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__B, + (__v2du)__C); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_shldv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -450,7 +458,7 @@ _mm_mask_shldv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) (__v2di)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -458,14 +466,14 @@ _mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i)__builtin_ia32_vpshldvd256((__v8si)__A, (__v8si)__B, - (__v8si)__C); + return (__m256i)__builtin_elementwise_fshl((__v8su)__A, (__v8su)__B, + (__v8su)__C); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_shldv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -473,7 +481,7 @@ _mm256_mask_shldv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) (__v8si)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -481,14 +489,14 @@ _mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i)__builtin_ia32_vpshldvd128((__v4si)__A, (__v4si)__B, - (__v4si)__C); + return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__B, + (__v4su)__C); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_shldv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -496,7 +504,7 @@ _mm_mask_shldv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) (__v4si)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -504,14 +512,14 @@ _mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i)__builtin_ia32_vpshldvw256((__v16hi)__A, (__v16hi)__B, - (__v16hi)__C); + return (__m256i)__builtin_elementwise_fshl((__v16hu)__A, (__v16hu)__B, + (__v16hu)__C); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_shldv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectw_256(__U, @@ -519,7 +527,7 @@ _mm256_mask_shldv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) (__v16hi)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectw_256(__U, @@ -527,14 +535,14 @@ _mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i)__builtin_ia32_vpshldvw128((__v8hi)__A, (__v8hi)__B, - (__v8hi)__C); + return (__m128i)__builtin_elementwise_fshl((__v8hu)__A, (__v8hu)__B, + (__v8hu)__C); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_shldv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectw_128(__U, @@ -542,7 +550,7 @@ _mm_mask_shldv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) (__v8hi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectw_128(__U, @@ -550,14 +558,15 @@ _mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i)__builtin_ia32_vpshrdvq256((__v4di)__A, (__v4di)__B, - (__v4di)__C); + // Ops __A and __B are swapped. + return (__m256i)__builtin_elementwise_fshr((__v4du)__B, (__v4du)__A, + (__v4du)__C); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -565,7 +574,7 @@ _mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) (__v4di)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -573,14 +582,15 @@ _mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i)__builtin_ia32_vpshrdvq128((__v2di)__A, (__v2di)__B, - (__v2di)__C); + // Ops __A and __B are swapped. + return (__m128i)__builtin_elementwise_fshr((__v2du)__B, (__v2du)__A, + (__v2du)__C); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_shrdv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -588,7 +598,7 @@ _mm_mask_shrdv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) (__v2di)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -596,14 +606,15 @@ _mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i)__builtin_ia32_vpshrdvd256((__v8si)__A, (__v8si)__B, - (__v8si)__C); + // Ops __A and __B are swapped. + return (__m256i)__builtin_elementwise_fshr((__v8su)__B, (__v8su)__A, + (__v8su)__C); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -611,7 +622,7 @@ _mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) (__v8si)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -619,14 +630,15 @@ _mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i)__builtin_ia32_vpshrdvd128((__v4si)__A, (__v4si)__B, - (__v4si)__C); + // Ops __A and __B are swapped. + return (__m128i)__builtin_elementwise_fshr((__v4su)__B, (__v4su)__A, + (__v4su)__C); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_shrdv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -634,7 +646,7 @@ _mm_mask_shrdv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) (__v4si)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -642,14 +654,15 @@ _mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i)__builtin_ia32_vpshrdvw256((__v16hi)__A, (__v16hi)__B, - (__v16hi)__C); + // Ops __A and __B are swapped. + return (__m256i)__builtin_elementwise_fshr((__v16hu)__B, (__v16hu)__A, + (__v16hu)__C); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectw_256(__U, @@ -657,7 +670,7 @@ _mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) (__v16hi)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) { return (__m256i)__builtin_ia32_selectw_256(__U, @@ -665,14 +678,15 @@ _mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i)__builtin_ia32_vpshrdvw128((__v8hi)__A, (__v8hi)__B, - (__v8hi)__C); + // Ops __A and __B are swapped. + return (__m128i)__builtin_elementwise_fshr((__v8hu)__B, (__v8hu)__A, + (__v8hu)__C); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_shrdv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectw_128(__U, @@ -680,7 +694,7 @@ _mm_mask_shrdv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) (__v8hi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_selectw_128(__U, @@ -688,8 +702,9 @@ _mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) (__v8hi)_mm_setzero_si128()); } - #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif diff --git a/lib/include/avx512vlvnniintrin.h b/lib/include/avx512vlvnniintrin.h index d1e5cd9d69..4b8a199af3 100644 --- a/lib/include/avx512vlvnniintrin.h +++ b/lib/include/avx512vlvnniintrin.h @@ -17,11 +17,11 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,avx512vnni,no-evex512"), \ + __target__("avx512vl,avx512vnni"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,avx512vnni,no-evex512"), \ + __target__("avx512vl,avx512vnni"), \ __min_vector_width__(256))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with @@ -41,8 +41,8 @@ /// ENDFOR /// DST[MAX:256] := 0 /// \endcode -#define _mm256_dpbusd_epi32(S, A, B) \ - ((__m256i)__builtin_ia32_vpdpbusd256((__v8si)(S), (__v8si)(A), (__v8si)(B))) +#define _mm256_dpbusd_epi32(S, A, B) \ + ((__m256i)__builtin_ia32_vpdpbusd256((__v8si)(S), (__v32qu)(A), (__v32qi)(B))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed @@ -61,8 +61,9 @@ /// ENDFOR /// DST[MAX:256] := 0 /// \endcode -#define _mm256_dpbusds_epi32(S, A, B) \ - ((__m256i)__builtin_ia32_vpdpbusds256((__v8si)(S), (__v8si)(A), (__v8si)(B))) +#define _mm256_dpbusds_epi32(S, A, B) \ + ((__m256i)__builtin_ia32_vpdpbusds256((__v8si)(S), (__v32qu)(A), \ + (__v32qi)(B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with /// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit @@ -79,8 +80,8 @@ /// ENDFOR /// DST[MAX:256] := 0 /// \endcode -#define _mm256_dpwssd_epi32(S, A, B) \ - ((__m256i)__builtin_ia32_vpdpwssd256((__v8si)(S), (__v8si)(A), (__v8si)(B))) +#define _mm256_dpwssd_epi32(S, A, B) \ + ((__m256i)__builtin_ia32_vpdpwssd256((__v8si)(S), (__v16hi)(A), (__v16hi)(B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with /// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit @@ -97,8 +98,9 @@ /// ENDFOR /// DST[MAX:256] := 0 /// \endcode -#define _mm256_dpwssds_epi32(S, A, B) \ - ((__m256i)__builtin_ia32_vpdpwssds256((__v8si)(S), (__v8si)(A), (__v8si)(B))) +#define _mm256_dpwssds_epi32(S, A, B) \ + ((__m256i)__builtin_ia32_vpdpwssds256((__v8si)(S), (__v16hi)(A), \ + (__v16hi)(B))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed @@ -117,8 +119,8 @@ /// ENDFOR /// DST[MAX:128] := 0 /// \endcode -#define _mm_dpbusd_epi32(S, A, B) \ - ((__m128i)__builtin_ia32_vpdpbusd128((__v4si)(S), (__v4si)(A), (__v4si)(B))) +#define _mm_dpbusd_epi32(S, A, B) \ + ((__m128i)__builtin_ia32_vpdpbusd128((__v4si)(S), (__v16qu)(A), (__v16qi)(B))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed @@ -137,8 +139,9 @@ /// ENDFOR /// DST[MAX:128] := 0 /// \endcode -#define _mm_dpbusds_epi32(S, A, B) \ - ((__m128i)__builtin_ia32_vpdpbusds128((__v4si)(S), (__v4si)(A), (__v4si)(B))) +#define _mm_dpbusds_epi32(S, A, B) \ + ((__m128i)__builtin_ia32_vpdpbusds128((__v4si)(S), (__v16qu)(A), \ + (__v16qi)(B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with /// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit @@ -155,8 +158,8 @@ /// ENDFOR /// DST[MAX:128] := 0 /// \endcode -#define _mm_dpwssd_epi32(S, A, B) \ - ((__m128i)__builtin_ia32_vpdpwssd128((__v4si)(S), (__v4si)(A), (__v4si)(B))) +#define _mm_dpwssd_epi32(S, A, B) \ + ((__m128i)__builtin_ia32_vpdpwssd128((__v4si)(S), (__v8hi)(A), (__v8hi)(B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with /// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit @@ -173,8 +176,8 @@ /// ENDFOR /// DST[MAX:128] := 0 /// \endcode -#define _mm_dpwssds_epi32(S, A, B) \ - ((__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v4si)(A), (__v4si)(B))) +#define _mm_dpwssds_epi32(S, A, B) \ + ((__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v8hi)(A), (__v8hi)(B))) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) diff --git a/lib/include/avx512vlvp2intersectintrin.h b/lib/include/avx512vlvp2intersectintrin.h index 63a31241a5..8cb33169e5 100644 --- a/lib/include/avx512vlvp2intersectintrin.h +++ b/lib/include/avx512vlvp2intersectintrin.h @@ -30,12 +30,12 @@ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,avx512vp2intersect,no-evex512"), \ + __target__("avx512vl,avx512vp2intersect"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,avx512vp2intersect,no-evex512"), \ + __target__("avx512vl,avx512vp2intersect"), \ __min_vector_width__(256))) /// Store, in an even/odd pair of mask registers, the indicators of the /// locations of value matches between dwords in operands __a and __b. diff --git a/lib/include/avx512vnniintrin.h b/lib/include/avx512vnniintrin.h index 0fb381a12f..2ce88efe4a 100644 --- a/lib/include/avx512vnniintrin.h +++ b/lib/include/avx512vnniintrin.h @@ -16,14 +16,14 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vnni,evex512"), __min_vector_width__(512))) + __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"), \ + __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v16si)__A, - (__v16si)__B); + return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v64qu)__A, + (__v64qi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -45,8 +45,8 @@ _mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v16si)__A, - (__v16si)__B); + return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v64qu)__A, + (__v64qi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -68,8 +68,8 @@ _mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v16si)__A, - (__v16si)__B); + return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v32hi)__A, + (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -91,8 +91,8 @@ _mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v16si)__A, - (__v16si)__B); + return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v32hi)__A, + (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS diff --git a/lib/include/avx512vp2intersectintrin.h b/lib/include/avx512vp2intersectintrin.h index 16552cae3b..7d999960a5 100644 --- a/lib/include/avx512vp2intersectintrin.h +++ b/lib/include/avx512vp2intersectintrin.h @@ -30,8 +30,7 @@ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vp2intersect,evex512"), \ - __min_vector_width__(512))) + __target__("avx512vp2intersect"), __min_vector_width__(512))) /// Store, in an even/odd pair of mask registers, the indicators of the /// locations of value matches between dwords in operands __a and __b. diff --git a/lib/include/avx512vpopcntdqintrin.h b/lib/include/avx512vpopcntdqintrin.h index e24c2c5e1b..cc884fea41 100644 --- a/lib/include/avx512vpopcntdqintrin.h +++ b/lib/include/avx512vpopcntdqintrin.h @@ -16,19 +16,18 @@ #define __AVX512VPOPCNTDQINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vpopcntdq,evex512"), \ - __min_vector_width__(512))) - -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr + __target__("avx512vpopcntdq"), \ + __min_vector_width__(512))) constexpr #else -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vpopcntdq"), __min_vector_width__(512))) #endif -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_popcnt_epi64(__m512i __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v8du)__A); } @@ -43,8 +42,7 @@ _mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) { return _mm512_mask_popcnt_epi64((__m512i)_mm512_setzero_si512(), __U, __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_popcnt_epi32(__m512i __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi32(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v16su)__A); } diff --git a/lib/include/avx512vpopcntdqvlintrin.h b/lib/include/avx512vpopcntdqvlintrin.h index b6c819b0cb..9b26aa1e63 100644 --- a/lib/include/avx512vpopcntdqvlintrin.h +++ b/lib/include/avx512vpopcntdqvlintrin.h @@ -16,25 +16,28 @@ #define __AVX512VPOPCNTDQVLINTRIN_H /* Define the default attributes for the functions in this file. */ + +#if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vpopcntdq,avx512vl,no-evex512"), \ + __target__("avx512vpopcntdq,avx512vl"), \ + __min_vector_width__(128))) constexpr +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vpopcntdq,avx512vl"), \ + __min_vector_width__(256))) constexpr +#else +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vpopcntdq,avx512vl"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vpopcntdq,avx512vl,no-evex512"), \ + __target__("avx512vpopcntdq,avx512vl"), \ __min_vector_width__(256))) - -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr -#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr -#else -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 -#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 #endif -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_popcnt_epi64(__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi64(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v2du)__A); } @@ -49,8 +52,7 @@ _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) { return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_popcnt_epi32(__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi32(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v4su)__A); } @@ -65,7 +67,7 @@ _mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) { return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi64(__m256i __A) { return (__m256i)__builtin_elementwise_popcount((__v4du)__A); } @@ -81,7 +83,7 @@ _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) { return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi32(__m256i __A) { return (__m256i)__builtin_elementwise_popcount((__v8su)__A); } diff --git a/lib/include/avxifmaintrin.h b/lib/include/avxifmaintrin.h index 5c782d2a5b..30df01caed 100644 --- a/lib/include/avxifmaintrin.h +++ b/lib/include/avxifmaintrin.h @@ -15,12 +15,28 @@ #define __AVXIFMAINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \ + __min_vector_width__(128))) constexpr +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \ + __min_vector_width__(256))) constexpr +#else #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \ __min_vector_width__(256))) +#endif + +#if !defined(__AVX512IFMA__) && defined(__AVXIFMA__) +#define _mm_madd52hi_epu64(X, Y, Z) _mm_madd52hi_avx_epu64(X, Y, Z) +#define _mm_madd52lo_epu64(X, Y, Z) _mm_madd52lo_avx_epu64(X, Y, Z) +#define _mm256_madd52hi_epu64(X, Y, Z) _mm256_madd52hi_avx_epu64(X, Y, Z) +#define _mm256_madd52lo_epu64(X, Y, Z) _mm256_madd52lo_avx_epu64(X, Y, Z) +#endif // must vex-encoding diff --git a/lib/include/avxintrin.h b/lib/include/avxintrin.h index 8e497a9823..fbd20e5832 100644 --- a/lib/include/avxintrin.h +++ b/lib/include/avxintrin.h @@ -50,28 +50,19 @@ typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32))); #endif /* Define the default attributes for the functions in this file. */ -#if defined(__EVEX512__) && !defined(__AVX10_1_512__) -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \ - __min_vector_width__(256))) -#define __DEFAULT_FN_ATTRS128 \ - __attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \ - __min_vector_width__(128))) -#else #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("avx"), \ __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("avx"), \ __min_vector_width__(128))) -#endif #if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr #define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr #else -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS128 -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 #endif /* Arithmetic */ @@ -87,9 +78,8 @@ typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32))); /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the sums of both /// operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_add_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_add_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a+(__v4df)__b); } @@ -105,9 +95,8 @@ _mm256_add_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the sums of both /// operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_add_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_add_ps(__m256 __a, + __m256 __b) { return (__m256)((__v8sf)__a+(__v8sf)__b); } @@ -123,9 +112,8 @@ _mm256_add_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing the subtrahend. /// \returns A 256-bit vector of [4 x double] containing the differences between /// both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_sub_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_sub_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a-(__v4df)__b); } @@ -141,9 +129,8 @@ _mm256_sub_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing the subtrahend. /// \returns A 256-bit vector of [8 x float] containing the differences between /// both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_sub_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a, + __m256 __b) { return (__m256)((__v8sf)__a-(__v8sf)__b); } @@ -160,9 +147,8 @@ _mm256_sub_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing the right source operand. /// \returns A 256-bit vector of [4 x double] containing the alternating sums /// and differences between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_addsub_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_addsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b); } @@ -179,9 +165,8 @@ _mm256_addsub_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing the right source operand. /// \returns A 256-bit vector of [8 x float] containing the alternating sums and /// differences between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_addsub_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_addsub_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b); } @@ -197,9 +182,8 @@ _mm256_addsub_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing the divisor. /// \returns A 256-bit vector of [4 x double] containing the quotients of both /// operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_div_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_div_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a/(__v4df)__b); } @@ -215,9 +199,8 @@ _mm256_div_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing the divisor. /// \returns A 256-bit vector of [8 x float] containing the quotients of both /// operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_div_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_div_ps(__m256 __a, + __m256 __b) { return (__m256)((__v8sf)__a/(__v8sf)__b); } @@ -236,9 +219,8 @@ _mm256_div_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing one of the operands. /// \returns A 256-bit vector of [4 x double] containing the maximum values /// between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_max_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_max_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b); } @@ -257,9 +239,8 @@ _mm256_max_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the operands. /// \returns A 256-bit vector of [8 x float] containing the maximum values /// between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_max_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_max_ps(__m256 __a, + __m256 __b) { return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b); } @@ -278,9 +259,8 @@ _mm256_max_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing one of the operands. /// \returns A 256-bit vector of [4 x double] containing the minimum values /// between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_min_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_min_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b); } @@ -299,9 +279,8 @@ _mm256_min_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the operands. /// \returns A 256-bit vector of [8 x float] containing the minimum values /// between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_min_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_min_ps(__m256 __a, + __m256 __b) { return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b); } @@ -317,9 +296,8 @@ _mm256_min_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing one of the operands. /// \returns A 256-bit vector of [4 x double] containing the products of both /// operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_mul_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_mul_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a * (__v4df)__b); } @@ -335,9 +313,8 @@ _mm256_mul_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the operands. /// \returns A 256-bit vector of [8 x float] containing the products of both /// operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_mul_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_mul_ps(__m256 __a, + __m256 __b) { return (__m256)((__v8sf)__a * (__v8sf)__b); } @@ -352,10 +329,8 @@ _mm256_mul_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the square roots of the /// values in the operand. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_sqrt_pd(__m256d __a) -{ - return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a); +static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sqrt_pd(__m256d __a) { + return __builtin_elementwise_sqrt(__a); } /// Calculates the square roots of the values in a 256-bit vector of @@ -369,10 +344,8 @@ _mm256_sqrt_pd(__m256d __a) /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the square roots of the /// values in the operand. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_sqrt_ps(__m256 __a) -{ - return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a); +static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sqrt_ps(__m256 __a) { + return __builtin_elementwise_sqrt(__a); } /// Calculates the reciprocal square roots of the values in a 256-bit @@ -555,7 +528,7 @@ _mm256_rcp_ps(__m256 __a) /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the /// values between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_and_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4du)__a & (__v4du)__b); @@ -573,7 +546,7 @@ _mm256_and_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the /// values between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_and_ps(__m256 __a, __m256 __b) { return (__m256)((__v8su)__a & (__v8su)__b); @@ -594,7 +567,7 @@ _mm256_and_ps(__m256 __a, __m256 __b) /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the /// values of the second operand and the one's complement of the first /// operand. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_andnot_pd(__m256d __a, __m256d __b) { return (__m256d)(~(__v4du)__a & (__v4du)__b); @@ -615,7 +588,7 @@ _mm256_andnot_pd(__m256d __a, __m256d __b) /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the /// values of the second operand and the one's complement of the first /// operand. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_andnot_ps(__m256 __a, __m256 __b) { return (__m256)(~(__v8su)__a & (__v8su)__b); @@ -633,7 +606,7 @@ _mm256_andnot_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the bitwise OR of the /// values between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_or_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4du)__a | (__v4du)__b); @@ -651,7 +624,7 @@ _mm256_or_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the bitwise OR of the /// values between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_or_ps(__m256 __a, __m256 __b) { return (__m256)((__v8su)__a | (__v8su)__b); @@ -669,7 +642,7 @@ _mm256_or_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the bitwise XOR of the /// values between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_xor_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4du)__a ^ (__v4du)__b); @@ -687,7 +660,7 @@ _mm256_xor_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the bitwise XOR of the /// values between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_xor_ps(__m256 __a, __m256 __b) { return (__m256)((__v8su)__a ^ (__v8su)__b); @@ -711,9 +684,8 @@ _mm256_xor_ps(__m256 __a, __m256 __b) /// elements of a vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the horizontal sums of /// both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_hadd_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_hadd_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b); } @@ -734,9 +706,8 @@ _mm256_hadd_pd(__m256d __a, __m256d __b) /// index 2, 3, 6, 7 of a vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the horizontal sums of /// both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_hadd_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hadd_ps(__m256 __a, + __m256 __b) { return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b); } @@ -757,9 +728,8 @@ _mm256_hadd_ps(__m256 __a, __m256 __b) /// odd-indexed elements of a vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the horizontal /// differences of both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_hsub_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_hsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b); } @@ -780,9 +750,8 @@ _mm256_hsub_pd(__m256d __a, __m256d __b) /// elements with index 2, 3, 6, 7 of a vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the horizontal /// differences of both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_hsub_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hsub_ps(__m256 __a, + __m256 __b) { return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b); } @@ -810,9 +779,8 @@ _mm256_hsub_ps(__m256 __a, __m256 __b) /// 1: Bits [127:64] of the source are copied to bits [127:64] of the /// returned vector. /// \returns A 128-bit vector of [2 x double] containing the copied values. -static __inline __m128d __DEFAULT_FN_ATTRS128 -_mm_permutevar_pd(__m128d __a, __m128i __c) -{ +static __inline __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_permutevar_pd(__m128d __a, __m128i __c) { return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c); } @@ -849,9 +817,8 @@ _mm_permutevar_pd(__m128d __a, __m128i __c) /// 1: Bits [255:192] of the source are copied to bits [255:192] of the /// returned vector. /// \returns A 256-bit vector of [4 x double] containing the copied values. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_permutevar_pd(__m256d __a, __m256i __c) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_permutevar_pd(__m256d __a, __m256i __c) { return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c); } @@ -904,9 +871,8 @@ _mm256_permutevar_pd(__m256d __a, __m256i __c) /// 11: Bits [127:96] of the source are copied to bits [127:96] of the /// returned vector. /// \returns A 128-bit vector of [4 x float] containing the copied values. -static __inline __m128 __DEFAULT_FN_ATTRS128 -_mm_permutevar_ps(__m128 __a, __m128i __c) -{ +static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_permutevar_ps(__m128 __a, __m128i __c) { return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c); } @@ -995,9 +961,8 @@ _mm_permutevar_ps(__m128 __a, __m128i __c) /// 11: Bits [255:224] of the source are copied to bits [255:224] of the /// returned vector. /// \returns A 256-bit vector of [8 x float] containing the copied values. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_permutevar_ps(__m256 __a, __m256i __c) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_permutevar_ps(__m256 __a, __m256i __c) { return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c); } @@ -1419,9 +1384,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// 64-bit element in operand \a __b is copied to the same position in the /// destination. /// \returns A 256-bit vector of [4 x double] containing the copied values. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c) { return (__m256d)__builtin_ia32_blendvpd256( (__v4df)__a, (__v4df)__b, (__v4df)__c); } @@ -1447,9 +1411,8 @@ _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c) /// corresponding 32-bit element in operand \a __b is copied to the same /// position in the destination. /// \returns A 256-bit vector of [8 x float] containing the copied values. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) { return (__m256)__builtin_ia32_blendvps256( (__v8sf)__a, (__v8sf)__b, (__v8sf)__c); } @@ -2190,9 +2153,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// \param __a /// A 128-bit integer vector of [4 x i32]. /// \returns A 256-bit vector of [4 x double] containing the converted values. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_cvtepi32_pd(__m128i __a) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_cvtepi32_pd(__m128i __a) { return (__m256d)__builtin_convertvector((__v4si)__a, __v4df); } @@ -2205,9 +2167,8 @@ _mm256_cvtepi32_pd(__m128i __a) /// \param __a /// A 256-bit integer vector. /// \returns A 256-bit vector of [8 x float] containing the converted values. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_cvtepi32_ps(__m256i __a) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_cvtepi32_ps(__m256i __a) { return (__m256)__builtin_convertvector((__v8si)__a, __v8sf); } @@ -2221,9 +2182,8 @@ _mm256_cvtepi32_ps(__m256i __a) /// \param __a /// A 256-bit vector of [4 x double]. /// \returns A 128-bit vector of [4 x float] containing the converted values. -static __inline __m128 __DEFAULT_FN_ATTRS -_mm256_cvtpd_ps(__m256d __a) -{ +static __inline __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_cvtpd_ps(__m256d __a) { return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a); } @@ -2256,9 +2216,8 @@ _mm256_cvtps_epi32(__m256 __a) /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 256-bit vector of [4 x double] containing the converted values. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_cvtps_pd(__m128 __a) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_cvtps_pd(__m128 __a) { return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df); } @@ -2333,10 +2292,9 @@ _mm256_cvttps_epi32(__m256 __a) /// \param __a /// A 256-bit vector of [4 x double]. /// \returns A 64 bit double containing the first element of the input vector. -static __inline double __DEFAULT_FN_ATTRS -_mm256_cvtsd_f64(__m256d __a) -{ - return __a[0]; +static __inline double __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_cvtsd_f64(__m256d __a) { + return __a[0]; } /// Returns the first element of the input vector of [8 x i32]. @@ -2349,11 +2307,10 @@ _mm256_cvtsd_f64(__m256d __a) /// \param __a /// A 256-bit vector of [8 x i32]. /// \returns A 32 bit integer containing the first element of the input vector. -static __inline int __DEFAULT_FN_ATTRS -_mm256_cvtsi256_si32(__m256i __a) -{ - __v8si __b = (__v8si)__a; - return __b[0]; +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_cvtsi256_si32(__m256i __a) { + __v8si __b = (__v8si)__a; + return __b[0]; } /// Returns the first element of the input vector of [8 x float]. @@ -2366,10 +2323,9 @@ _mm256_cvtsi256_si32(__m256i __a) /// \param __a /// A 256-bit vector of [8 x float]. /// \returns A 32 bit float containing the first element of the input vector. -static __inline float __DEFAULT_FN_ATTRS -_mm256_cvtss_f32(__m256 __a) -{ - return __a[0]; +static __inline float __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_cvtss_f32(__m256 __a) { + return __a[0]; } /* Vector replicate */ @@ -2392,7 +2348,7 @@ _mm256_cvtss_f32(__m256 __a) /// return value. /// \returns A 256-bit vector of [8 x float] containing the moved and duplicated /// values. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_movehdup_ps(__m256 __a) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7); @@ -2417,7 +2373,7 @@ _mm256_movehdup_ps(__m256 __a) /// return value. /// \returns A 256-bit vector of [8 x float] containing the moved and duplicated /// values. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_moveldup_ps(__m256 __a) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6); @@ -2439,7 +2395,7 @@ _mm256_moveldup_ps(__m256 __a) /// the return value. /// \returns A 256-bit vector of [4 x double] containing the moved and /// duplicated values. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_movedup_pd(__m256d __a) { return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2); @@ -2462,9 +2418,8 @@ _mm256_movedup_pd(__m256d __a) /// Bits [127:64] are written to bits [127:64] of the return value. \n /// Bits [255:192] are written to bits [255:192] of the return value. \n /// \returns A 256-bit vector of [4 x double] containing the interleaved values. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_unpackhi_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_unpackhi_pd(__m256d __a, __m256d __b) { return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2); } @@ -2484,9 +2439,8 @@ _mm256_unpackhi_pd(__m256d __a, __m256d __b) /// Bits [63:0] are written to bits [127:64] of the return value. \n /// Bits [191:128] are written to bits [255:192] of the return value. \n /// \returns A 256-bit vector of [4 x double] containing the interleaved values. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_unpacklo_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_unpacklo_pd(__m256d __a, __m256d __b) { return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2); } @@ -2511,9 +2465,8 @@ _mm256_unpacklo_pd(__m256d __a, __m256d __b) /// Bits [223:192] are written to bits [191:160] of the return value. \n /// Bits [255:224] are written to bits [255:224] of the return value. /// \returns A 256-bit vector of [8 x float] containing the interleaved values. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_unpackhi_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_unpackhi_ps(__m256 __a, __m256 __b) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1); } @@ -2538,9 +2491,8 @@ _mm256_unpackhi_ps(__m256 __a, __m256 __b) /// Bits [159:128] are written to bits [191:160] of the return value. \n /// Bits [191:160] are written to bits [255:224] of the return value. /// \returns A 256-bit vector of [8 x float] containing the interleaved values. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_unpacklo_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_unpacklo_ps(__m256 __a, __m256 __b) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1); } @@ -2568,9 +2520,8 @@ _mm256_unpacklo_ps(__m256 __a, __m256 __b) /// \param __b /// A 128-bit vector of [2 x double]. /// \returns the ZF flag in the EFLAGS register. -static __inline int __DEFAULT_FN_ATTRS128 -_mm_testz_pd(__m128d __a, __m128d __b) -{ +static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_pd(__m128d __a, + __m128d __b) { return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b); } @@ -2597,9 +2548,8 @@ _mm_testz_pd(__m128d __a, __m128d __b) /// \param __b /// A 128-bit vector of [2 x double]. /// \returns the CF flag in the EFLAGS register. -static __inline int __DEFAULT_FN_ATTRS128 -_mm_testc_pd(__m128d __a, __m128d __b) -{ +static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_pd(__m128d __a, + __m128d __b) { return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b); } @@ -2627,9 +2577,8 @@ _mm_testc_pd(__m128d __a, __m128d __b) /// \param __b /// A 128-bit vector of [2 x double]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS128 -_mm_testnzc_pd(__m128d __a, __m128d __b) -{ +static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_testnzc_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b); } @@ -2656,9 +2605,8 @@ _mm_testnzc_pd(__m128d __a, __m128d __b) /// \param __b /// A 128-bit vector of [4 x float]. /// \returns the ZF flag. -static __inline int __DEFAULT_FN_ATTRS128 -_mm_testz_ps(__m128 __a, __m128 __b) -{ +static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b); } @@ -2685,9 +2633,8 @@ _mm_testz_ps(__m128 __a, __m128 __b) /// \param __b /// A 128-bit vector of [4 x float]. /// \returns the CF flag. -static __inline int __DEFAULT_FN_ATTRS128 -_mm_testc_ps(__m128 __a, __m128 __b) -{ +static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b); } @@ -2715,9 +2662,8 @@ _mm_testc_ps(__m128 __a, __m128 __b) /// \param __b /// A 128-bit vector of [4 x float]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS128 -_mm_testnzc_ps(__m128 __a, __m128 __b) -{ +static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testnzc_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b); } @@ -2744,9 +2690,8 @@ _mm_testnzc_ps(__m128 __a, __m128 __b) /// \param __b /// A 256-bit vector of [4 x double]. /// \returns the ZF flag. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testz_pd(__m256d __a, __m256d __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_pd(__m256d __a, + __m256d __b) { return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b); } @@ -2773,9 +2718,8 @@ _mm256_testz_pd(__m256d __a, __m256d __b) /// \param __b /// A 256-bit vector of [4 x double]. /// \returns the CF flag. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testc_pd(__m256d __a, __m256d __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_pd(__m256d __a, + __m256d __b) { return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b); } @@ -2803,9 +2747,8 @@ _mm256_testc_pd(__m256d __a, __m256d __b) /// \param __b /// A 256-bit vector of [4 x double]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testnzc_pd(__m256d __a, __m256d __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_testnzc_pd(__m256d __a, __m256d __b) { return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b); } @@ -2832,9 +2775,8 @@ _mm256_testnzc_pd(__m256d __a, __m256d __b) /// \param __b /// A 256-bit vector of [8 x float]. /// \returns the ZF flag. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testz_ps(__m256 __a, __m256 __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_ps(__m256 __a, + __m256 __b) { return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b); } @@ -2861,9 +2803,8 @@ _mm256_testz_ps(__m256 __a, __m256 __b) /// \param __b /// A 256-bit vector of [8 x float]. /// \returns the CF flag. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testc_ps(__m256 __a, __m256 __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_ps(__m256 __a, + __m256 __b) { return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b); } @@ -2891,9 +2832,8 @@ _mm256_testc_ps(__m256 __a, __m256 __b) /// \param __b /// A 256-bit vector of [8 x float]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testnzc_ps(__m256 __a, __m256 __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testnzc_ps(__m256 __a, + __m256 __b) { return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b); } @@ -2917,9 +2857,8 @@ _mm256_testnzc_ps(__m256 __a, __m256 __b) /// \param __b /// A 256-bit integer vector. /// \returns the ZF flag. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testz_si256(__m256i __a, __m256i __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_testz_si256(__m256i __a, __m256i __b) { return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b); } @@ -2943,9 +2882,8 @@ _mm256_testz_si256(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns the CF flag. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testc_si256(__m256i __a, __m256i __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_testc_si256(__m256i __a, __m256i __b) { return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b); } @@ -2970,9 +2908,8 @@ _mm256_testc_si256(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testnzc_si256(__m256i __a, __m256i __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_testnzc_si256(__m256i __a, __m256i __b) { return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b); } @@ -2989,9 +2926,8 @@ _mm256_testnzc_si256(__m256i __a, __m256i __b) /// A 256-bit vector of [4 x double] containing the double-precision /// floating point values with sign bits to be extracted. /// \returns The sign bits from the operand, written to bits [3:0]. -static __inline int __DEFAULT_FN_ATTRS -_mm256_movemask_pd(__m256d __a) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_movemask_pd(__m256d __a) { return __builtin_ia32_movmskpd256((__v4df)__a); } @@ -3007,9 +2943,8 @@ _mm256_movemask_pd(__m256d __a) /// A 256-bit vector of [8 x float] containing the single-precision floating /// point values with sign bits to be extracted. /// \returns The sign bits from the operand, written to bits [7:0]. -static __inline int __DEFAULT_FN_ATTRS -_mm256_movemask_ps(__m256 __a) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_movemask_ps(__m256 __a) { return __builtin_ia32_movmskps256((__v8sf)__a); } @@ -3666,9 +3601,7 @@ _mm256_undefined_pd(void) /// This intrinsic has no corresponding instruction. /// /// \returns A 256-bit vector of [8 x float] containing undefined values. -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_undefined_ps(void) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void) { return (__m256)__builtin_ia32_undef256(); } @@ -3777,7 +3710,7 @@ _mm256_set_ps(float __a, float __b, float __c, float __d, /// \param __i7 /// A 32-bit integral value used to initialize bits [31:0] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7) { @@ -3825,7 +3758,7 @@ _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3, /// \param __w00 /// A 16-bit integral value used to initialize bits [15:0] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, @@ -3908,7 +3841,7 @@ _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12, /// \param __b00 /// An 8-bit integral value used to initialize bits [7:0] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, @@ -3943,7 +3876,7 @@ _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28, /// \param __d /// A 64-bit integral value used to initialize bits [63:0] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d) { return __extension__ (__m256i)(__v4di){ __d, __c, __b, __a }; @@ -4044,7 +3977,7 @@ _mm256_setr_ps(float __a, float __b, float __c, float __d, /// \param __i7 /// A 32-bit integral value used to initialize bits [255:224] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7) { @@ -4092,7 +4025,7 @@ _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3, /// \param __w00 /// A 16-bit integral value used to initialize bits [255:240] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, @@ -4177,7 +4110,7 @@ _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12, /// \param __b00 /// An 8-bit integral value used to initialize bits [255:248] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, @@ -4210,7 +4143,7 @@ _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28, /// \param __d /// A 64-bit integral value used to initialize bits [255:192] of the result. /// \returns An initialized 256-bit integer vector. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d) { return _mm256_set_epi64x(__d, __c, __b, __a); @@ -4267,7 +4200,7 @@ _mm256_set1_ps(float __w) /// A 32-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [8 x i32]. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi32(int __i) { return _mm256_set_epi32(__i, __i, __i, __i, __i, __i, __i, __i); @@ -4285,7 +4218,7 @@ _mm256_set1_epi32(int __i) /// A 16-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [16 x i16]. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi16(short __w) { return _mm256_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w, @@ -4303,7 +4236,7 @@ _mm256_set1_epi16(short __w) /// An 8-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [32 x i8]. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi8(char __b) { return _mm256_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, @@ -4324,7 +4257,7 @@ _mm256_set1_epi8(char __b) /// A 64-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [4 x i64]. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi64x(long long __q) { return _mm256_set_epi64x(__q, __q, __q, __q); @@ -4379,7 +4312,7 @@ _mm256_setzero_si256(void) { /// A 256-bit floating-point vector of [4 x double]. /// \returns A 256-bit floating-point vector of [8 x float] containing the same /// bitwise pattern as the parameter. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castpd_ps(__m256d __a) { return (__m256)__a; @@ -4396,7 +4329,7 @@ _mm256_castpd_ps(__m256d __a) /// A 256-bit floating-point vector of [4 x double]. /// \returns A 256-bit integer vector containing the same bitwise pattern as the /// parameter. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castpd_si256(__m256d __a) { return (__m256i)__a; @@ -4413,7 +4346,7 @@ _mm256_castpd_si256(__m256d __a) /// A 256-bit floating-point vector of [8 x float]. /// \returns A 256-bit floating-point vector of [4 x double] containing the same /// bitwise pattern as the parameter. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castps_pd(__m256 __a) { return (__m256d)__a; @@ -4430,7 +4363,7 @@ _mm256_castps_pd(__m256 __a) /// A 256-bit floating-point vector of [8 x float]. /// \returns A 256-bit integer vector containing the same bitwise pattern as the /// parameter. -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castps_si256(__m256 __a) { return (__m256i)__a; @@ -4447,7 +4380,7 @@ _mm256_castps_si256(__m256 __a) /// A 256-bit integer vector. /// \returns A 256-bit floating-point vector of [8 x float] containing the same /// bitwise pattern as the parameter. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castsi256_ps(__m256i __a) { return (__m256)__a; @@ -4464,7 +4397,7 @@ _mm256_castsi256_ps(__m256i __a) /// A 256-bit integer vector. /// \returns A 256-bit floating-point vector of [4 x double] containing the same /// bitwise pattern as the parameter. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castsi256_pd(__m256i __a) { return (__m256d)__a; @@ -4481,7 +4414,7 @@ _mm256_castsi256_pd(__m256i __a) /// A 256-bit floating-point vector of [4 x double]. /// \returns A 128-bit floating-point vector of [2 x double] containing the /// lower 128 bits of the parameter. -static __inline __m128d __DEFAULT_FN_ATTRS +static __inline __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castpd256_pd128(__m256d __a) { return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1); @@ -4498,7 +4431,7 @@ _mm256_castpd256_pd128(__m256d __a) /// A 256-bit floating-point vector of [8 x float]. /// \returns A 128-bit floating-point vector of [4 x float] containing the /// lower 128 bits of the parameter. -static __inline __m128 __DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castps256_ps128(__m256 __a) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3); @@ -4514,7 +4447,7 @@ _mm256_castps256_ps128(__m256 __a) /// A 256-bit integer vector. /// \returns A 128-bit integer vector containing the lower 128 bits of the /// parameter. -static __inline __m128i __DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castsi256_si128(__m256i __a) { return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1); @@ -4598,9 +4531,8 @@ _mm256_castsi128_si256(__m128i __a) /// A 128-bit vector of [2 x double]. /// \returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits /// contain the value of the parameter. The upper 128 bits are set to zero. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_zextpd128_pd256(__m128d __a) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_zextpd128_pd256(__m128d __a) { return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3); } @@ -4616,9 +4548,8 @@ _mm256_zextpd128_pd256(__m128d __a) /// A 128-bit vector of [4 x float]. /// \returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits /// contain the value of the parameter. The upper 128 bits are set to zero. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_zextps128_ps256(__m128 __a) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_zextps128_ps256(__m128 __a) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7); } @@ -4634,9 +4565,8 @@ _mm256_zextps128_ps256(__m128 __a) /// A 128-bit integer vector. /// \returns A 256-bit integer vector. The lower 128 bits contain the value of /// the parameter. The upper 128 bits are set to zero. -static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_zextsi128_si256(__m128i __a) -{ +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_zextsi128_si256(__m128i __a) { return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3); } @@ -4851,9 +4781,8 @@ _mm256_zextsi128_si256(__m128i __a) /// 128 bits of the result. /// \returns A 256-bit floating-point vector of [8 x float] containing the /// concatenated result. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_set_m128 (__m128 __hi, __m128 __lo) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_set_m128(__m128 __hi, __m128 __lo) { return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7); } @@ -4872,9 +4801,8 @@ _mm256_set_m128 (__m128 __hi, __m128 __lo) /// 128 bits of the result. /// \returns A 256-bit floating-point vector of [4 x double] containing the /// concatenated result. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_set_m128d (__m128d __hi, __m128d __lo) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_set_m128d(__m128d __hi, __m128d __lo) { return (__m256d) __builtin_shufflevector((__v2df)__lo, (__v2df)__hi, 0, 1, 2, 3); } @@ -4892,9 +4820,8 @@ _mm256_set_m128d (__m128d __hi, __m128d __lo) /// A 128-bit integer vector to be copied to the lower 128 bits of the /// result. /// \returns A 256-bit integer vector containing the concatenated result. -static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_set_m128i (__m128i __hi, __m128i __lo) -{ +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_set_m128i(__m128i __hi, __m128i __lo) { return (__m256i) __builtin_shufflevector((__v2di)__lo, (__v2di)__hi, 0, 1, 2, 3); } @@ -4915,9 +4842,8 @@ _mm256_set_m128i (__m128i __hi, __m128i __lo) /// 128 bits of the result. /// \returns A 256-bit floating-point vector of [8 x float] containing the /// concatenated result. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_setr_m128 (__m128 __lo, __m128 __hi) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_setr_m128(__m128 __lo, __m128 __hi) { return _mm256_set_m128(__hi, __lo); } @@ -4938,9 +4864,8 @@ _mm256_setr_m128 (__m128 __lo, __m128 __hi) /// 128 bits of the result. /// \returns A 256-bit floating-point vector of [4 x double] containing the /// concatenated result. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_setr_m128d (__m128d __lo, __m128d __hi) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_setr_m128d(__m128d __lo, __m128d __hi) { return (__m256d)_mm256_set_m128d(__hi, __lo); } @@ -4959,9 +4884,8 @@ _mm256_setr_m128d (__m128d __lo, __m128d __hi) /// A 128-bit integer vector to be copied to the upper 128 bits of the /// result. /// \returns A 256-bit integer vector containing the concatenated result. -static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_setr_m128i (__m128i __lo, __m128i __hi) -{ +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_setr_m128i(__m128i __lo, __m128i __hi) { return (__m256i)_mm256_set_m128i(__hi, __lo); } diff --git a/lib/include/avxvnniint16intrin.h b/lib/include/avxvnniint16intrin.h index 805d249911..98d94ee3fc 100644 --- a/lib/include/avxvnniint16intrin.h +++ b/lib/include/avxvnniint16intrin.h @@ -16,9 +16,10 @@ #define __AVXVNNIINT16INTRIN_H /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with -/// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate -/// signed 16-bit results. Sum these 2 results with the corresponding -/// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. +/// corresponding unsigned 16-bit integers in \a __B, producing 2 +/// intermediate signed 16-bit results. Sum these 2 results with the +/// corresponding 32-bit integer in \a __W, and store the packed 32-bit +/// results in \a dst. /// /// \headerfile /// @@ -40,19 +41,21 @@ /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) -/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) +/// tmp2.dword := +/// SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 /// ENDFOR /// dst[MAX:128] := 0 /// \endcode #define _mm_dpwsud_epi32(__W, __A, __B) \ - ((__m128i)__builtin_ia32_vpdpwsud128((__v4si)(__W), (__v4si)(__A), \ - (__v4si)(__B))) + ((__m128i)__builtin_ia32_vpdpwsud128((__v4si)(__W), (__v8hi)(__A), \ + (__v8hu)(__B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with -/// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate -/// signed 16-bit results. Sum these 2 results with the corresponding -/// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. +/// corresponding unsigned 16-bit integers in \a __B, producing 2 +/// intermediate signed 16-bit results. Sum these 2 results with the +/// corresponding 32-bit integer in \a __W, and store the packed 32-bit +/// results in \a dst. /// /// \headerfile /// @@ -74,20 +77,21 @@ /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) -/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) -/// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 +/// tmp2.dword := +/// SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) +/// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 /// ENDFOR /// dst[MAX:256] := 0 /// \endcode #define _mm256_dpwsud_epi32(__W, __A, __B) \ - ((__m256i)__builtin_ia32_vpdpwsud256((__v8si)(__W), (__v8si)(__A), \ - (__v8si)(__B))) + ((__m256i)__builtin_ia32_vpdpwsud256((__v8si)(__W), (__v16hi)(__A), \ + (__v16hu)(__B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with -/// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate -/// signed 16-bit results. Sum these 2 results with the corresponding -/// 32-bit integer in \a __W with signed saturation, and store the packed -/// 32-bit results in \a dst. +/// corresponding unsigned 16-bit integers in \a __B, producing 2 +/// intermediate signed 16-bit results. Sum these 2 results with the +/// corresponding 32-bit integer in \a __W with signed saturation, and store +/// the packed 32-bit results in \a dst. /// /// \headerfile /// @@ -109,20 +113,22 @@ /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) -/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) +/// tmp2.dword := +/// SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with #define _mm_dpwsuds_epi32(__W, __A, __B) \ - ((__m128i)__builtin_ia32_vpdpwsuds128((__v4si)(__W), (__v4si)(__A), \ - (__v4si)(__B))) + ((__m128i)__builtin_ia32_vpdpwsuds128((__v4si)(__W), (__v8hi)(__A), \ + (__v8hu)(__B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with -/// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate -/// signed 16-bit results. Sum these 2 results with the corresponding -/// 32-bit integer in \a __W with signed saturation, and store the packed -/// 32-bit results in \a dst. +/// corresponding unsigned 16-bit integers in \a __B, producing 2 +/// intermediate signed 16-bit results. Sum these 2 results with the +/// corresponding 32-bit integer in \a __W with signed saturation, and store +/// the packed 32-bit results in \a dst. /// /// \headerfile /// @@ -144,19 +150,21 @@ /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) -/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) +/// tmp2.dword := +/// SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode #define _mm256_dpwsuds_epi32(__W, __A, __B) \ - ((__m256i)__builtin_ia32_vpdpwsuds256((__v8si)(__W), (__v8si)(__A), \ - (__v8si)(__B))) + ((__m256i)__builtin_ia32_vpdpwsuds256((__v8si)(__W), (__v16hi)(__A), \ + (__v16hu)(__B))) -/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with -/// corresponding signed 16-bit integers in \a __B, producing 2 intermediate -/// signed 16-bit results. Sum these 2 results with the corresponding -/// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A +/// with corresponding signed 16-bit integers in \a __B, producing 2 +/// intermediate signed 16-bit results. Sum these 2 results with the +/// corresponding 32-bit integer in \a __W, and store the packed 32-bit +/// results in \a dst. /// /// \headerfile /// @@ -178,19 +186,21 @@ /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) -/// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) +/// tmp2.dword := +/// ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 /// ENDFOR /// dst[MAX:128] := 0 /// \endcode #define _mm_dpwusd_epi32(__W, __A, __B) \ - ((__m128i)__builtin_ia32_vpdpwusd128((__v4si)(__W), (__v4si)(__A), \ - (__v4si)(__B))) + ((__m128i)__builtin_ia32_vpdpwusd128((__v4si)(__W), (__v8hu)(__A), \ + (__v8hi)(__B))) -/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with -/// corresponding signed 16-bit integers in \a __B, producing 2 intermediate -/// signed 16-bit results. Sum these 2 results with the corresponding -/// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A +/// with corresponding signed 16-bit integers in \a __B, producing 2 +/// intermediate signed 16-bit results. Sum these 2 results with the +/// corresponding 32-bit integer in \a __W, and store the packed 32-bit +/// results in \a dst. /// /// \headerfile /// @@ -212,20 +222,21 @@ /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) -/// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) +/// tmp2.dword := +/// ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 /// ENDFOR /// dst[MAX:256] := 0 /// \endcode #define _mm256_dpwusd_epi32(__W, __A, __B) \ - ((__m256i)__builtin_ia32_vpdpwusd256((__v8si)(__W), (__v8si)(__A), \ - (__v8si)(__B))) + ((__m256i)__builtin_ia32_vpdpwusd256((__v8si)(__W), (__v16hu)(__A), \ + (__v16hi)(__B))) -/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with -/// corresponding signed 16-bit integers in \a __B, producing 2 intermediate -/// signed 16-bit results. Sum these 2 results with the corresponding -/// 32-bit integer in \a __W with signed saturation, and store the packed -/// 32-bit results in \a dst. +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A +/// with corresponding signed 16-bit integers in \a __B, producing 2 +/// intermediate signed 16-bit results. Sum these 2 results with the +/// corresponding 32-bit integer in \a __W with signed saturation, and +/// store the packed 32-bit results in \a dst. /// /// \headerfile /// @@ -233,7 +244,7 @@ /// __m128i _mm_dpwusds_epi32(__m128i __W, __m128i __A, __m128i __B) /// \endcode /// -/// This intrinsic corresponds to the \c VPDPWSUDS instruction. +/// This intrinsic corresponds to the \c VPDPWUSDS instruction. /// /// \param __W /// A 128-bit vector of [4 x int]. @@ -247,20 +258,21 @@ /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) -/// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) +/// tmp2.dword := +/// ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode #define _mm_dpwusds_epi32(__W, __A, __B) \ - ((__m128i)__builtin_ia32_vpdpwusds128((__v4si)(__W), (__v4si)(__A), \ - (__v4si)(__B))) + ((__m128i)__builtin_ia32_vpdpwusds128((__v4si)(__W), (__v8hu)(__A), \ + (__v8hi)(__B))) -/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with -/// corresponding signed 16-bit integers in \a __B, producing 2 intermediate -/// signed 16-bit results. Sum these 2 results with the corresponding -/// 32-bit integer in \a __W with signed saturation, and store the packed -/// 32-bit results in \a dst. +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A +/// with corresponding signed 16-bit integers in \a __B, producing 2 +/// intermediate signed 16-bit results. Sum these 2 results with the +/// corresponding 32-bit integer in \a __W with signed saturation, and +/// store the packed 32-bit results in \a dst. /// /// \headerfile /// @@ -268,7 +280,7 @@ /// __m256i _mm256_dpwsuds_epi32(__m256i __W, __m256i __A, __m256i __B) /// \endcode /// -/// This intrinsic corresponds to the \c VPDPWSUDS instruction. +/// This intrinsic corresponds to the \c VPDPWUSDS instruction. /// /// \param __W /// A 256-bit vector of [8 x int]. @@ -282,19 +294,21 @@ /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) -/// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) +/// tmp2.dword := +/// ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode #define _mm256_dpwusds_epi32(__W, __A, __B) \ - ((__m256i)__builtin_ia32_vpdpwusds256((__v8si)(__W), (__v8si)(__A), \ - (__v8si)(__B))) + ((__m256i)__builtin_ia32_vpdpwusds256((__v8si)(__W), (__v16hu)(__A), \ + (__v16hi)(__B))) -/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with -/// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate -/// signed 16-bit results. Sum these 2 results with the corresponding -/// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A +/// with corresponding unsigned 16-bit integers in \a __B, producing 2 +/// intermediate signed 16-bit results. Sum these 2 results with the +/// corresponding 32-bit integer in \a __W, and store the packed 32-bit +/// results in \a dst. /// /// \headerfile /// @@ -305,30 +319,32 @@ /// This intrinsic corresponds to the \c VPDPWUUD instruction. /// /// \param __W -/// A 128-bit vector of [4 x unsigned int]. +/// A 128-bit vector of [4 x int]. /// \param __A /// A 128-bit vector of [8 x unsigned short]. /// \param __B /// A 128-bit vector of [8 x unsigned short]. /// \returns -/// A 128-bit vector of [4 x unsigned int]. +/// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) -/// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) +/// tmp2.dword := +/// ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 /// ENDFOR /// dst[MAX:128] := 0 /// \endcode #define _mm_dpwuud_epi32(__W, __A, __B) \ - ((__m128i)__builtin_ia32_vpdpwuud128((__v4si)(__W), (__v4si)(__A), \ - (__v4si)(__B))) + ((__m128i)__builtin_ia32_vpdpwuud128((__v4si)(__W), (__v8hu)(__A), \ + (__v8hu)(__B))) -/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with -/// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate -/// signed 16-bit results. Sum these 2 results with the corresponding -/// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A +/// with corresponding unsigned 16-bit integers in \a __B, producing 2 +/// intermediate signed 16-bit results. Sum these 2 results with the +/// corresponding 32-bit integer in \a __W, and store the packed 32-bit +/// results in \a dst. /// /// \headerfile /// @@ -339,31 +355,32 @@ /// This intrinsic corresponds to the \c VPDPWUUD instruction. /// /// \param __W -/// A 256-bit vector of [8 x unsigned int]. +/// A 256-bit vector of [8 x int]. /// \param __A /// A 256-bit vector of [16 x unsigned short]. /// \param __B /// A 256-bit vector of [16 x unsigned short]. /// \returns -/// A 256-bit vector of [8 x unsigned int]. +/// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) -/// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) +/// tmp2.dword := +/// ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 /// ENDFOR /// dst[MAX:256] := 0 /// \endcode #define _mm256_dpwuud_epi32(__W, __A, __B) \ - ((__m256i)__builtin_ia32_vpdpwuud256((__v8si)(__W), (__v8si)(__A), \ - (__v8si)(__B))) + ((__m256i)__builtin_ia32_vpdpwuud256((__v8si)(__W), (__v16hu)(__A), \ + (__v16hu)(__B))) -/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with -/// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate -/// signed 16-bit results. Sum these 2 results with the corresponding -/// 32-bit integer in \a __W with signed saturation, and store the packed -/// 32-bit results in \a dst. +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A +/// with corresponding unsigned 16-bit integers in \a __B, producing 2 +/// intermediate signed 16-bit results. Sum these 2 results with the +/// corresponding 32-bit integer in \a __W with signed saturation, and store +/// the packed 32-bit results in \a dst. /// /// \headerfile /// @@ -371,34 +388,35 @@ /// __m128i _mm_dpwsuds_epi32(__m128i __W, __m128i __A, __m128i __B) /// \endcode /// -/// This intrinsic corresponds to the \c VPDPWSUDS instruction. +/// This intrinsic corresponds to the \c VPDPWUUDS instruction. /// /// \param __W -/// A 128-bit vector of [4 x unsigned int]. +/// A 128-bit vector of [4 x int]. /// \param __A /// A 128-bit vector of [8 x unsigned short]. /// \param __B /// A 128-bit vector of [8 x unsigned short]. /// \returns -/// A 128-bit vector of [4 x unsigned int]. +/// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) -/// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) +/// tmp2.dword := +/// ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode #define _mm_dpwuuds_epi32(__W, __A, __B) \ - ((__m128i)__builtin_ia32_vpdpwuuds128((__v4si)(__W), (__v4si)(__A), \ - (__v4si)(__B))) + ((__m128i)__builtin_ia32_vpdpwuuds128((__v4si)(__W), (__v8hu)(__A), \ + (__v8hu)(__B))) -/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with -/// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate -/// signed 16-bit results. Sum these 2 results with the corresponding -/// 32-bit integer in \a __W with signed saturation, and store the packed -/// 32-bit results in \a dst. +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A +/// with corresponding unsigned 16-bit integers in \a __B, producing 2 +/// intermediate signed 16-bit results. Sum these 2 results with the +/// corresponding 32-bit integer in \a __W with signed saturation, and store +/// the packed 32-bit results in \a dst. /// /// \headerfile /// @@ -406,27 +424,28 @@ /// __m256i _mm256_dpwuuds_epi32(__m256i __W, __m256i __A, __m256i __B) /// \endcode /// -/// This intrinsic corresponds to the \c VPDPWSUDS instruction. +/// This intrinsic corresponds to the \c VPDPWUUDS instruction. /// /// \param __W -/// A 256-bit vector of [8 x unsigned int]. +/// A 256-bit vector of [8 x int]. /// \param __A /// A 256-bit vector of [16 x unsigned short]. /// \param __B /// A 256-bit vector of [16 x unsigned short]. /// \returns -/// A 256-bit vector of [8 x unsigned int]. +/// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) -/// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) +/// tmp2.dword := +/// ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode #define _mm256_dpwuuds_epi32(__W, __A, __B) \ - ((__m256i)__builtin_ia32_vpdpwuuds256((__v8si)(__W), (__v8si)(__A), \ - (__v8si)(__B))) + ((__m256i)__builtin_ia32_vpdpwuuds256((__v8si)(__W), (__v16hu)(__A), \ + (__v16hu)(__B))) #endif // __AVXVNNIINT16INTRIN_H diff --git a/lib/include/avxvnniint8intrin.h b/lib/include/avxvnniint8intrin.h index c211620c68..858b66b138 100644 --- a/lib/include/avxvnniint8intrin.h +++ b/lib/include/avxvnniint8intrin.h @@ -14,6 +14,7 @@ #ifndef __AVXVNNIINT8INTRIN_H #define __AVXVNNIINT8INTRIN_H +// clang-format off /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -44,10 +45,12 @@ /// ENDFOR /// dst[MAX:128] := 0 /// \endcode +// clang-format on #define _mm_dpbssd_epi32(__W, __A, __B) \ - ((__m128i)__builtin_ia32_vpdpbssd128((__v4si)(__W), (__v4si)(__A), \ - (__v4si)(__B))) + ((__m128i)__builtin_ia32_vpdpbssd128((__v4si)(__W), (__v16qi)(__A), \ + (__v16qi)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -78,10 +81,12 @@ /// ENDFOR /// dst[MAX:256] := 0 /// \endcode +// clang-format on #define _mm256_dpbssd_epi32(__W, __A, __B) \ - ((__m256i)__builtin_ia32_vpdpbssd256((__v8si)(__W), (__v8si)(__A), \ - (__v8si)(__B))) + ((__m256i)__builtin_ia32_vpdpbssd256((__v8si)(__W), (__v32qi)(__A), \ + (__v32qi)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -94,7 +99,7 @@ /// _mm_dpbssds_epi32( __m128i __W, __m128i __A, __m128i __B); /// \endcode /// -/// This intrinsic corresponds to the \c VPDPBSSD instruction. +/// This intrinsic corresponds to the \c VPDPBSSDS instruction. /// /// \param __A /// A 128-bit vector of [16 x char]. @@ -113,10 +118,12 @@ /// ENDFOR /// dst[MAX:128] := 0 /// \endcode +// clang-format on #define _mm_dpbssds_epi32(__W, __A, __B) \ - ((__m128i)__builtin_ia32_vpdpbssds128((__v4si)(__W), (__v4si)(__A), \ - (__v4si)(__B))) + ((__m128i)__builtin_ia32_vpdpbssds128((__v4si)(__W), (__v16qi)(__A), \ + (__v16qi)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -129,7 +136,7 @@ /// _mm256_dpbssds_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// -/// This intrinsic corresponds to the \c VPDPBSSD instruction. +/// This intrinsic corresponds to the \c VPDPBSSDS instruction. /// /// \param __A /// A 256-bit vector of [32 x char]. @@ -148,10 +155,12 @@ /// ENDFOR /// dst[MAX:256] := 0 /// \endcode +// clang-format on #define _mm256_dpbssds_epi32(__W, __A, __B) \ - ((__m256i)__builtin_ia32_vpdpbssds256((__v8si)(__W), (__v8si)(__A), \ - (__v8si)(__B))) + ((__m256i)__builtin_ia32_vpdpbssds256((__v8si)(__W), (__v32qi)(__A), \ + (__v32qi)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -163,7 +172,7 @@ /// _mm_dpbsud_epi32(__m128i __W, __m128i __A, __m128i __B); /// \endcode /// -/// This intrinsic corresponds to the \c VPDPBSSD instruction. +/// This intrinsic corresponds to the \c VPDPBSUD instruction. /// /// \param __A /// A 128-bit vector of [16 x char]. @@ -182,10 +191,12 @@ /// ENDFOR /// dst[MAX:128] := 0 /// \endcode +// clang-format on #define _mm_dpbsud_epi32(__W, __A, __B) \ - ((__m128i)__builtin_ia32_vpdpbsud128((__v4si)(__W), (__v4si)(__A), \ - (__v4si)(__B))) + ((__m128i)__builtin_ia32_vpdpbsud128((__v4si)(__W), (__v16qi)(__A), \ + (__v16qu)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -197,7 +208,7 @@ /// _mm256_dpbsud_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// -/// This intrinsic corresponds to the \c VPDPBSSD instruction. +/// This intrinsic corresponds to the \c VPDPBSUD instruction. /// /// \param __A /// A 256-bit vector of [32 x char]. @@ -216,10 +227,12 @@ /// ENDFOR /// dst[MAX:256] := 0 /// \endcode +// clang-format on #define _mm256_dpbsud_epi32(__W, __A, __B) \ - ((__m256i)__builtin_ia32_vpdpbsud256((__v8si)(__W), (__v8si)(__A), \ - (__v8si)(__B))) + ((__m256i)__builtin_ia32_vpdpbsud256((__v8si)(__W), (__v32qi)(__A), \ + (__v32qu)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -232,7 +245,7 @@ /// _mm_dpbsuds_epi32( __m128i __W, __m128i __A, __m128i __B); /// \endcode /// -/// This intrinsic corresponds to the \c VPDPBSSD instruction. +/// This intrinsic corresponds to the \c VPDPBSUDS instruction. /// /// \param __A /// A 128-bit vector of [16 x char]. @@ -251,10 +264,12 @@ /// ENDFOR /// dst[MAX:128] := 0 /// \endcode +// clang-format on #define _mm_dpbsuds_epi32(__W, __A, __B) \ - ((__m128i)__builtin_ia32_vpdpbsuds128((__v4si)(__W), (__v4si)(__A), \ - (__v4si)(__B))) + ((__m128i)__builtin_ia32_vpdpbsuds128((__v4si)(__W), (__v16qi)(__A), \ + (__v16qu)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -267,7 +282,7 @@ /// _mm256_dpbsuds_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// -/// This intrinsic corresponds to the \c VPDPBSSD instruction. +/// This intrinsic corresponds to the \c VPDPBSUDS instruction. /// /// \param __A /// A 256-bit vector of [32 x char]. @@ -286,10 +301,12 @@ /// ENDFOR /// dst[MAX:256] := 0 /// \endcode +// clang-format on #define _mm256_dpbsuds_epi32(__W, __A, __B) \ - ((__m256i)__builtin_ia32_vpdpbsuds256((__v8si)(__W), (__v8si)(__A), \ - (__v8si)(__B))) + ((__m256i)__builtin_ia32_vpdpbsuds256((__v8si)(__W), (__v32qi)(__A), \ + (__v32qu)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -301,7 +318,7 @@ /// _mm_dpbuud_epi32(__m128i __W, __m128i __A, __m128i __B); /// \endcode /// -/// This intrinsic corresponds to the \c VPDPBSSD instruction. +/// This intrinsic corresponds to the \c VPDPBUUD instruction. /// /// \param __A /// A 128-bit vector of [16 x unsigned char]. @@ -320,10 +337,12 @@ /// ENDFOR /// dst[MAX:128] := 0 /// \endcode +// clang-format on #define _mm_dpbuud_epi32(__W, __A, __B) \ - ((__m128i)__builtin_ia32_vpdpbuud128((__v4si)(__W), (__v4si)(__A), \ - (__v4si)(__B))) + ((__m128i)__builtin_ia32_vpdpbuud128((__v4si)(__W), (__v16qu)(__A), \ + (__v16qu)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -335,7 +354,7 @@ /// _mm256_dpbuud_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// -/// This intrinsic corresponds to the \c VPDPBSSD instruction. +/// This intrinsic corresponds to the \c VPDPBUUD instruction. /// /// \param __A /// A 256-bit vector of [32 x unsigned char]. @@ -354,10 +373,12 @@ /// ENDFOR /// dst[MAX:256] := 0 /// \endcode +// clang-format on #define _mm256_dpbuud_epi32(__W, __A, __B) \ - ((__m256i)__builtin_ia32_vpdpbuud256((__v8si)(__W), (__v8si)(__A), \ - (__v8si)(__B))) + ((__m256i)__builtin_ia32_vpdpbuud256((__v8si)(__W), (__v32qu)(__A), \ + (__v32qu)(__B))) +// clang-format off /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -389,10 +410,12 @@ /// ENDFOR /// dst[MAX:128] := 0 /// \endcode +// clang-format on #define _mm_dpbuuds_epi32(__W, __A, __B) \ - ((__m128i)__builtin_ia32_vpdpbuuds128((__v4si)(__W), (__v4si)(__A), \ - (__v4si)(__B))) + ((__m128i)__builtin_ia32_vpdpbuuds128((__v4si)(__W), (__v16qu)(__A), \ + (__v16qu)(__B))) +// clang-format off /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed @@ -423,8 +446,9 @@ /// ENDFOR /// dst[MAX:256] := 0 /// \endcode +// clang-format on #define _mm256_dpbuuds_epi32(__W, __A, __B) \ - ((__m256i)__builtin_ia32_vpdpbuuds256((__v8si)(__W), (__v8si)(__A), \ - (__v8si)(__B))) + ((__m256i)__builtin_ia32_vpdpbuuds256((__v8si)(__W), (__v32qu)(__A), \ + (__v32qu)(__B))) #endif // __AVXVNNIINT8INTRIN_H diff --git a/lib/include/avxvnniintrin.h b/lib/include/avxvnniintrin.h index b7de562b57..1d2e8c906e 100644 --- a/lib/include/avxvnniintrin.h +++ b/lib/include/avxvnniintrin.h @@ -63,7 +63,8 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A, (__v8si)__B); + return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v32qu)__A, + (__v32qi)__B); } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with @@ -86,7 +87,8 @@ _mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A, (__v8si)__B); + return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v32qu)__A, + (__v32qi)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with @@ -107,7 +109,8 @@ _mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A, (__v8si)__B); + return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v16hi)__A, + (__v16hi)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with @@ -128,7 +131,8 @@ _mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A, (__v8si)__B); + return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v16hi)__A, + (__v16hi)__B); } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with @@ -151,7 +155,8 @@ _mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A, (__v4si)__B); + return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v16qu)__A, + (__v16qi)__B); } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with @@ -174,7 +179,8 @@ _mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A, (__v4si)__B); + return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v16qu)__A, + (__v16qi)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with @@ -195,7 +201,8 @@ _mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A, (__v4si)__B); + return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v8hi)__A, + (__v8hi)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with @@ -216,7 +223,8 @@ _mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A, (__v4si)__B); + return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v8hi)__A, + (__v8hi)__B); } #undef __DEFAULT_FN_ATTRS128 diff --git a/lib/include/cpuid.h b/lib/include/cpuid.h index 52addb7bfa..156425c756 100644 --- a/lib/include/cpuid.h +++ b/lib/include/cpuid.h @@ -253,10 +253,6 @@ #define bit_RDPRU 0x00000010 #define bit_WBNOINVD 0x00000200 -/* Features in %ebx for leaf 0x24 */ -#define bit_AVX10_256 0x00020000 -#define bit_AVX10_512 0x00040000 - #ifdef __i386__ #define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \ __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ @@ -282,6 +278,24 @@ : "0"(__leaf), "2"(__count)) #endif +/// Queries the processor to determine the highest supported \c CPUID leaf. +/// This intrinsic is only available on x86 and x64. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the CPUID instruction. +/// +/// \param __leaf +/// \a __leaf can be either 0x0 or 0x8000000. If \a __leaf == 0x0, the +/// highest supported value for basic \c CPUID information is returned. +/// If \a __leaf == 0x8000000, the highest supported value for extended +/// \c CPUID information is returned. +/// \param __sig +/// If the \a __sig pointer is non-null, the first four bytes of the +/// signature (as found in the \c EBX register) are returned in the +/// location pointed to by \a __sig. +/// \returns Returns 0 if \c CPUID is supported; otherwise returns the value +/// that \c CPUID returns in the \c EAX register. static __inline unsigned int __get_cpuid_max (unsigned int __leaf, unsigned int *__sig) { @@ -315,6 +329,32 @@ static __inline unsigned int __get_cpuid_max (unsigned int __leaf, return __eax; } +/// For the requested \c CPUID leaf, queries the processor for information +/// about the CPU type and CPU features (such as processor vendor, supported +/// instruction sets, CPU capabilities, cache sizes, CPU model and family, and +/// other hardware details). This intrinsic is only available on x86 and x64. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the CPUID instruction. +/// +/// \param __leaf +/// An unsigned integer that identifies the level (also called "leaf") at +/// which the \c CPUID instruction will be executed. +/// \param __eax +/// A pointer to an integer that corresponds to the \c EAX register where +/// \c CPUID stores output results. +/// \param __ebx +/// A pointer to an integer that corresponds to the \c EBX register where +/// \c CPUID stores output results. +/// \param __ecx +/// A pointer to an integer that corresponds to the \c ECX register where +/// \c CPUID stores output results. +/// \param __edx +/// A pointer to an integer that corresponds to the \c EDX register where +/// \c CPUID stores output results. +/// \returns Returns 1 if the requested \c CPUID leaf is supported; otherwise +/// returns 0. static __inline int __get_cpuid (unsigned int __leaf, unsigned int *__eax, unsigned int *__ebx, unsigned int *__ecx, unsigned int *__edx) @@ -328,6 +368,36 @@ static __inline int __get_cpuid (unsigned int __leaf, unsigned int *__eax, return 1; } +/// For the requested \c CPUID leaf and subleaf, queries the processor for +/// information about the CPU type and CPU features (such as processor vendor, +/// supported instruction sets, CPU capabilities, cache sizes, CPU model and +/// family, and other hardware details). This intrinsic is only available on +/// x86 and x64. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the CPUID instruction. +/// +/// \param __leaf +/// An unsigned integer that identifies the level (also called "leaf") at +/// which the \c CPUID instruction will be executed. +/// \param __subleaf +/// An unsigned integer that identifies the sublevel (also called +/// "subleaf") at which the \c CPUID instruction will be executed. +/// \param __eax +/// A pointer to an integer that corresponds to the \c EAX register where +/// \c CPUID stores output results. +/// \param __ebx +/// A pointer to an integer that corresponds to the \c EBX register where +/// \c CPUID stores output results. +/// \param __ecx +/// A pointer to an integer that corresponds to the \c ECX register where +/// \c CPUID stores output results. +/// \param __edx +/// A pointer to an integer that corresponds to the \c EDX register where +/// \c CPUID stores output results. +/// \returns Returns 1 if the requested \c CPUID leaf is supported; otherwise +/// returns 0. static __inline int __get_cpuid_count (unsigned int __leaf, unsigned int __subleaf, unsigned int *__eax, unsigned int *__ebx, @@ -345,10 +415,37 @@ static __inline int __get_cpuid_count (unsigned int __leaf, // In some configurations, __cpuidex is defined as a builtin (primarily // -fms-extensions) which will conflict with the __cpuidex definition below. #if !(__has_builtin(__cpuidex)) +// In some cases, offloading will set the host as the aux triple and define the +// builtin. Given __has_builtin does not detect builtins on aux triples, we need +// to explicitly check for some offloading cases. +#if !defined(__NVPTX__) && !defined(__AMDGPU__) && !defined(__SPIRV__) +/// Executes the \c CPUID instruction with the specified leaf and subleaf +/// values, and returns the results from the CPU's registers. This intrinsic +/// is only available on x86 and x64. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the CPUID instruction. +/// +/// \param __cpu_info +/// An output array of four integers: +///
    +///
  • \a __cpuInfo[0] receives the value of the \c EAX register.
  • +///
  • \a __cpuInfo[1] receives the value of the \c EBX register.
  • +///
  • \a __cpuInfo[2] receives the value of the \c ECX register.
  • +///
  • \a __cpuInfo[3] receives the value of the \c EDX register.
  • +///
+/// \param __leaf +/// An unsigned integer that identifies the level (also called the "leaf") +/// at which the \c CPUID instruction will be executed. +/// \param __subleaf +/// An unsigned integer that identifies the sublevel (also called the +/// "subleaf") at which the \c CPUID instruction will be executed. static __inline void __cpuidex(int __cpu_info[4], int __leaf, int __subleaf) { __cpuid_count(__leaf, __subleaf, __cpu_info[0], __cpu_info[1], __cpu_info[2], __cpu_info[3]); } #endif +#endif #endif /* __CPUID_H */ diff --git a/lib/include/emmintrin.h b/lib/include/emmintrin.h index 78e8a422db..61b35e9731 100644 --- a/lib/include/emmintrin.h +++ b/lib/include/emmintrin.h @@ -17,7 +17,6 @@ #include typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16))); -typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16))); typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1))); typedef long long __m128i_u @@ -25,14 +24,9 @@ typedef long long __m128i_u /* Type defines. */ typedef double __v2df __attribute__((__vector_size__(16))); -typedef long long __v2di __attribute__((__vector_size__(16))); -typedef short __v8hi __attribute__((__vector_size__(16))); -typedef char __v16qi __attribute__((__vector_size__(16))); /* Unsigned types */ typedef unsigned long long __v2du __attribute__((__vector_size__(16))); -typedef unsigned short __v8hu __attribute__((__vector_size__(16))); -typedef unsigned char __v16qu __attribute__((__vector_size__(16))); /* We need an explicitly signed variant for char. Note that this shouldn't * appear in the interface though. */ @@ -49,15 +43,9 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16))); #endif /* Define the default attributes for the functions in this file. */ -#if defined(__EVEX512__) && !defined(__AVX10_1_512__) -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("sse2,no-evex512"), __min_vector_width__(128))) -#else #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \ __min_vector_width__(128))) -#endif #if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr @@ -67,6 +55,9 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16))); #define __trunc64(x) \ (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0) +#define __zext128(x) \ + (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ + 1, 2, 3) #define __anyext128(x) \ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ 1, -1, -1) @@ -250,8 +241,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_pd(__m128d __a, /// bits are copied from the upper 64 bits of operand \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b); - return __extension__(__m128d){__c[0], __a[1]}; + return __extension__(__m128d){__builtin_elementwise_sqrt(__b[0]), __a[1]}; } /// Calculates the square root of the each of two values stored in a @@ -266,7 +256,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, /// \returns A 128-bit vector of [2 x double] containing the square roots of the /// values in the operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a) { - return __builtin_ia32_sqrtpd((__v2df)__a); + return __builtin_elementwise_sqrt(__a); } /// Compares lower 64-bit double-precision values of both operands, and @@ -310,8 +300,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a, /// A 128-bit vector of [2 x double] containing one of the operands. /// \returns A 128-bit vector of [2 x double] containing the minimum values /// between both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_pd(__m128d __a, + __m128d __b) { return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b); } @@ -356,8 +346,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a, /// A 128-bit vector of [2 x double] containing one of the operands. /// \returns A 128-bit vector of [2 x double] containing the maximum values /// between both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_pd(__m128d __a, + __m128d __b) { return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b); } @@ -1288,7 +1278,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a, /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// converted values. The upper 64 bits are set to zero. -static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) { +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtpd_ps(__m128d __a) { return __builtin_ia32_cvtpd2ps((__v2df)__a); } @@ -1393,8 +1384,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a) { /// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the /// converted value from the second parameter. The upper 96 bits are copied /// from the upper 96 bits of the first parameter. -static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, - __m128d __b) { +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtsd_ss(__m128 __a, __m128d __b) { return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b); } @@ -2068,8 +2059,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, /// A 128-bit vector of [16 x i8]. /// \returns A 128-bit vector of [16 x i8] containing the sums of both /// parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_add_epi8(__m128i __a, __m128i __b) { return (__m128i)((__v16qu)__a + (__v16qu)__b); } @@ -2089,8 +2080,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, /// A 128-bit vector of [8 x i16]. /// \returns A 128-bit vector of [8 x i16] containing the sums of both /// parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_add_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a + (__v8hu)__b); } @@ -2127,8 +2118,9 @@ _mm_add_epi32(__m128i __a, __m128i __b) { /// \param __b /// A 64-bit integer. /// \returns A 64-bit integer containing the sum of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b) { - return (__m64)(((unsigned long long)__a) + ((unsigned long long)__b)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_si64(__m64 __a, + __m64 __b) { + return (__m64)(((__v1du)__a)[0] + ((__v1du)__b)[0]); } /// Adds the corresponding elements of two 128-bit vectors of [2 x i64], @@ -2169,8 +2161,8 @@ _mm_add_epi64(__m128i __a, __m128i __b) { /// A 128-bit signed [16 x i8] vector. /// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of /// both parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_adds_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_add_sat((__v16qs)__a, (__v16qs)__b); } @@ -2191,8 +2183,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of /// both parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_adds_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_add_sat((__v8hi)__a, (__v8hi)__b); } @@ -2213,8 +2205,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, /// A 128-bit unsigned [16 x i8] vector. /// \returns A 128-bit unsigned [16 x i8] vector containing the saturated sums /// of both parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_adds_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_add_sat((__v16qu)__a, (__v16qu)__b); } @@ -2235,8 +2227,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, /// A 128-bit unsigned [8 x i16] vector. /// \returns A 128-bit unsigned [8 x i16] vector containing the saturated sums /// of both parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_adds_epu16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_add_sat((__v8hu)__a, (__v8hu)__b); } @@ -2254,9 +2246,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, /// A 128-bit unsigned [16 x i8] vector. /// \returns A 128-bit unsigned [16 x i8] vector containing the rounded /// averages of both parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, - __m128i __b) { - return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_avg_epu8(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_pavgb128((__v16qu)__a, (__v16qu)__b); } /// Computes the rounded averages of corresponding elements of two @@ -2273,9 +2265,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, /// A 128-bit unsigned [8 x i16] vector. /// \returns A 128-bit unsigned [8 x i16] vector containing the rounded /// averages of both parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, - __m128i __b) { - return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_avg_epu16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_pavgw128((__v8hu)__a, (__v8hu)__b); } /// Multiplies the corresponding elements of two 128-bit signed [8 x i16] @@ -2298,8 +2290,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [4 x i32] vector containing the sums of products /// of both parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_madd_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b); } @@ -2317,8 +2309,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the greater value of /// each comparison. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_max_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_max((__v8hi)__a, (__v8hi)__b); } @@ -2336,8 +2328,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, /// A 128-bit unsigned [16 x i8] vector. /// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of /// each comparison. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_max_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_max((__v16qu)__a, (__v16qu)__b); } @@ -2355,8 +2347,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the smaller value of /// each comparison. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_min_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_min((__v8hi)__a, (__v8hi)__b); } @@ -2374,8 +2366,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, /// A 128-bit unsigned [16 x i8] vector. /// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of /// each comparison. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_min_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_min((__v16qu)__a, (__v16qu)__b); } @@ -2393,8 +2385,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of /// each of the eight 32-bit products. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mulhi_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b); } @@ -2412,9 +2404,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, /// A 128-bit unsigned [8 x i16] vector. /// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits /// of each of the eight 32-bit products. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, - __m128i __b) { - return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mulhi_epu16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_pmulhuw128((__v8hu)__a, (__v8hu)__b); } /// Multiplies the corresponding elements of two signed [8 x i16] @@ -2431,8 +2423,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of /// each of the eight 32-bit products. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mullo_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a * (__v8hu)__b); } @@ -2449,9 +2441,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, /// \param __b /// A 64-bit integer containing one of the source operands. /// \returns A 64-bit integer vector containing the product of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) { - return __trunc64(__builtin_ia32_pmuludq128((__v4si)__anyext128(__a), - (__v4si)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_su32(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_pmuludq128((__v4si)__zext128(__a), + (__v4si)__zext128(__b))); } /// Multiplies 32-bit unsigned integer values contained in the lower @@ -2467,8 +2460,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) { /// \param __b /// A [2 x i64] vector containing one of the source operands. /// \returns A [2 x i64] vector containing the product of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mul_epu32(__m128i __a, __m128i __b) { return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b); } @@ -2505,8 +2498,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sub_epi8(__m128i __a, __m128i __b) { return (__m128i)((__v16qu)__a - (__v16qu)__b); } @@ -2522,8 +2515,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sub_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a - (__v8hu)__b); } @@ -2557,8 +2550,9 @@ _mm_sub_epi32(__m128i __a, __m128i __b) { /// A 64-bit integer vector containing the subtrahend. /// \returns A 64-bit integer vector containing the difference of the values in /// the operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b) { - return (__m64)((unsigned long long)__a - (unsigned long long)__b); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_si64(__m64 __a, + __m64 __b) { + return (__m64)(((__v1du)__a)[0] - ((__v1du)__b)[0]); } /// Subtracts the corresponding elements of two [2 x i64] vectors. @@ -2595,8 +2589,8 @@ _mm_sub_epi64(__m128i __a, __m128i __b) { /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_subs_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_sub_sat((__v16qs)__a, (__v16qs)__b); } @@ -2617,8 +2611,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_subs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_sub_sat((__v8hi)__a, (__v8hi)__b); } @@ -2638,8 +2632,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the unsigned integer /// differences of the values in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_subs_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_sub_sat((__v16qu)__a, (__v16qu)__b); } @@ -2659,8 +2653,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the unsigned integer /// differences of the values in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_subs_epu16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_sub_sat((__v8hu)__a, (__v8hu)__b); } @@ -2676,8 +2670,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, /// A 128-bit integer vector containing one of the source operands. /// \returns A 128-bit integer vector containing the bitwise AND of the values /// in both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_and_si128(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a & (__v2du)__b); } @@ -2695,8 +2689,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, /// A 128-bit vector containing the right source operand. /// \returns A 128-bit integer vector containing the bitwise AND of the one's /// complement of the first operand and the values in the second operand. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_andnot_si128(__m128i __a, __m128i __b) { return (__m128i)(~(__v2du)__a & (__v2du)__b); } /// Performs a bitwise OR of two 128-bit integer vectors. @@ -2711,8 +2705,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, /// A 128-bit integer vector containing one of the source operands. /// \returns A 128-bit integer vector containing the bitwise OR of the values /// in both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_or_si128(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a | (__v2du)__b); } @@ -2728,8 +2722,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, /// A 128-bit integer vector containing one of the source operands. /// \returns A 128-bit integer vector containing the bitwise exclusive OR of the /// values in both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_xor_si128(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a ^ (__v2du)__b); } @@ -2751,11 +2745,11 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, /// \a a. /// \returns A 128-bit integer vector containing the left-shifted value. #define _mm_slli_si128(a, imm) \ - ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), \ + ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v16qi)(__m128i)(a), \ (int)(imm))) #define _mm_bslli_si128(a, imm) \ - ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), \ + ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v16qi)(__m128i)(a), \ (int)(imm))) /// Left-shifts each 16-bit value in the 128-bit integer vector operand @@ -2771,8 +2765,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, /// An integer value specifying the number of bits to left-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, - int __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_slli_epi16(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count); } @@ -2789,8 +2783,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to left-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, - __m128i __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sll_epi16(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count); } @@ -2807,8 +2801,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, /// An integer value specifying the number of bits to left-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, - int __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_slli_epi32(__m128i __a, int __count) { return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count); } @@ -2825,8 +2819,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to left-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, - __m128i __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sll_epi32(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count); } @@ -2843,8 +2837,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, /// An integer value specifying the number of bits to left-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, - int __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_slli_epi64(__m128i __a, int __count) { return __builtin_ia32_psllqi128((__v2di)__a, __count); } @@ -2861,8 +2855,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to left-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, - __m128i __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sll_epi64(__m128i __a, __m128i __count) { return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count); } @@ -2880,8 +2874,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, - int __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_srai_epi16(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count); } @@ -2899,8 +2893,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to right-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, - __m128i __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sra_epi16(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count); } @@ -2918,8 +2912,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, - int __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_srai_epi32(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count); } @@ -2937,8 +2931,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to right-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, - __m128i __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sra_epi32(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count); } @@ -2960,11 +2954,11 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, /// \a a. /// \returns A 128-bit integer vector containing the right-shifted value. #define _mm_srli_si128(a, imm) \ - ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), \ + ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v16qi)(__m128i)(a), \ (int)(imm))) #define _mm_bsrli_si128(a, imm) \ - ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), \ + ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v16qi)(__m128i)(a), \ (int)(imm))) /// Right-shifts each of 16-bit values in the 128-bit integer vector @@ -2980,8 +2974,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, - int __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_srli_epi16(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count); } @@ -2998,8 +2992,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to right-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, - __m128i __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_srl_epi16(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count); } @@ -3016,8 +3010,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, - int __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_srli_epi32(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count); } @@ -3034,8 +3028,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to right-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, - __m128i __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_srl_epi32(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count); } @@ -3052,8 +3046,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, - int __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_srli_epi64(__m128i __a, int __count) { return __builtin_ia32_psrlqi128((__v2di)__a, __count); } @@ -3070,8 +3064,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to right-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, - __m128i __count) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_srl_epi64(__m128i __a, __m128i __count) { return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count); } @@ -3089,8 +3083,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cmpeq_epi8(__m128i __a, __m128i __b) { return (__m128i)((__v16qi)__a == (__v16qi)__b); } @@ -3108,8 +3102,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cmpeq_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hi)__a == (__v8hi)__b); } @@ -3127,8 +3121,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a, /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cmpeq_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4si)__a == (__v4si)__b); } @@ -3147,8 +3141,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a, /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cmpgt_epi8(__m128i __a, __m128i __b) { /* This function always performs a signed comparison, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m128i)((__v16qs)__a > (__v16qs)__b); @@ -3169,8 +3163,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a, /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cmpgt_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hi)__a > (__v8hi)__b); } @@ -3189,8 +3183,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a, /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cmpgt_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4si)__a > (__v4si)__b); } @@ -3209,8 +3203,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a, /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cmplt_epi8(__m128i __a, __m128i __b) { return _mm_cmpgt_epi8(__b, __a); } @@ -3229,8 +3223,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a, /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cmplt_epi16(__m128i __a, __m128i __b) { return _mm_cmpgt_epi16(__b, __a); } @@ -3249,8 +3243,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a, /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cmplt_epi32(__m128i __a, __m128i __b) { return _mm_cmpgt_epi32(__b, __a); } @@ -3379,7 +3373,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a) { /// \param __a /// A 32-bit signed integer operand. /// \returns A 128-bit vector of [4 x i32]. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtsi32_si128(int __a) { return __extension__(__m128i)(__v4si){__a, 0, 0, 0}; } @@ -3394,7 +3389,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a) { /// \param __a /// A 64-bit signed integer operand containing the value to be converted. /// \returns A 128-bit vector of [2 x i64] containing the converted value. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi64_si128(long long __a) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtsi64_si128(long long __a) { return __extension__(__m128i)(__v2di){__a, 0}; } @@ -3409,7 +3405,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi64_si128(long long __a) { /// A vector of [4 x i32]. The least significant 32 bits are moved to the /// destination. /// \returns A 32-bit signed integer containing the moved value. -static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtsi128_si32(__m128i __a) { __v4si __b = (__v4si)__a; return __b[0]; } @@ -3425,7 +3422,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a) { /// A vector of [2 x i64]. The least significant 64 bits are moved to the /// destination. /// \returns A 64-bit signed integer containing the moved value. -static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsi128_si64(__m128i __a) { +static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtsi128_si64(__m128i __a) { return __a[0]; } @@ -4161,8 +4159,8 @@ void _mm_mfence(void); /// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are /// written to the higher 64 bits of the result. /// \returns A 128-bit vector of [16 x i8] containing the converted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_packs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b); } @@ -4184,8 +4182,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, /// A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values /// are written to the higher 64 bits of the result. /// \returns A 128-bit vector of [8 x i16] containing the converted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_packs_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b); } @@ -4207,8 +4205,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, /// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are /// written to the higher 64 bits of the result. /// \returns A 128-bit vector of [16 x i8] containing the converted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_packus_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b); } @@ -4282,7 +4280,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, /// A 128-bit integer vector containing the values with bits to be extracted. /// \returns The most significant bits from each 8-bit element in \a __a, /// written to bits [15:0]. The other bits are assigned zeros. -static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_movemask_epi8(__m128i __a) { return __builtin_ia32_pmovmskb128((__v16qi)__a); } @@ -4415,8 +4414,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) { /// Bits [119:112] are written to bits [111:104] of the result. \n /// Bits [127:120] are written to bits [127:120] of the result. /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_unpackhi_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector( (__v16qi)__a, (__v16qi)__b, 8, 16 + 8, 9, 16 + 9, 10, 16 + 10, 11, 16 + 11, 12, 16 + 12, 13, 16 + 13, 14, 16 + 14, 15, 16 + 15); @@ -4443,8 +4442,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, /// Bits [111:96] are written to bits [95:80] of the result. \n /// Bits [127:112] are written to bits [127:112] of the result. /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_unpackhi_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8 + 4, 5, 8 + 5, 6, 8 + 6, 7, 8 + 7); } @@ -4466,8 +4465,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, /// Bits [95:64] are written to bits [64:32] of the destination. \n /// Bits [127:96] are written to bits [127:96] of the destination. /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_unpackhi_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4 + 2, 3, 4 + 3); } @@ -4487,8 +4486,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, /// A 128-bit vector of [2 x i64]. \n /// Bits [127:64] are written to bits [127:64] of the destination. /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_unpackhi_epi64(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2 + 1); } @@ -4521,8 +4520,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, /// Bits [55:48] are written to bits [111:104] of the result. \n /// Bits [63:56] are written to bits [127:120] of the result. /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_unpacklo_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector( (__v16qi)__a, (__v16qi)__b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4, 16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7); @@ -4550,8 +4549,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, /// Bits [47:32] are written to bits [95:80] of the result. \n /// Bits [63:48] are written to bits [127:112] of the result. /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_unpacklo_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8 + 0, 1, 8 + 1, 2, 8 + 2, 3, 8 + 3); } @@ -4573,8 +4572,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, /// Bits [31:0] are written to bits [64:32] of the destination. \n /// Bits [63:32] are written to bits [127:96] of the destination. /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_unpacklo_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4 + 0, 1, 4 + 1); } @@ -4594,8 +4593,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, /// A 128-bit vector of [2 x i64]. \n /// Bits [63:0] are written to bits [127:64] of the destination. \n /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_unpacklo_epi64(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2 + 0); } @@ -4701,7 +4700,8 @@ _mm_unpacklo_pd(__m128d __a, __m128d __b) { /// be extracted. /// \returns The sign bits from each of the double-precision elements in \a __a, /// written to bits [1:0]. The remaining bits are assigned values of zero. -static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_movemask_pd(__m128d __a) { return __builtin_ia32_movmskpd((__v2df)__a); } diff --git a/lib/include/f16cintrin.h b/lib/include/f16cintrin.h index 94a662c1d9..b6ca7088d3 100644 --- a/lib/include/f16cintrin.h +++ b/lib/include/f16cintrin.h @@ -15,10 +15,21 @@ #define __F16CINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS128 \ - __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS256 \ - __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("f16c"), \ + __min_vector_width__(128))) constexpr +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("f16c"), \ + __min_vector_width__(256))) constexpr +#else +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("f16c"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("f16c"), \ + __min_vector_width__(256))) +#endif /* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h, * but that's because icc can emulate these without f16c using a library call. @@ -38,9 +49,7 @@ static __inline float __DEFAULT_FN_ATTRS128 _cvtsh_ss(unsigned short __a) { - __v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0}; - __v4sf __r = __builtin_ia32_vcvtph2ps(__v); - return __r[0]; + return (float)__builtin_bit_cast(__fp16, __a); } /// Converts a 32-bit single-precision float value to a 16-bit @@ -109,7 +118,10 @@ _cvtsh_ss(unsigned short __a) static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_cvtph_ps(__m128i __a) { - return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a); + typedef __fp16 __v4fp16 __attribute__((__vector_size__(8))); + + __v4hi __v = __builtin_shufflevector((__v8hi)__a, (__v8hi)__a, 0, 1, 2, 3); + return (__m128) __builtin_convertvector((__v4fp16)__v, __v4sf); } /// Converts a 256-bit vector of [8 x float] into a 128-bit vector @@ -153,7 +165,9 @@ _mm_cvtph_ps(__m128i __a) static __inline __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtph_ps(__m128i __a) { - return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a); + typedef __fp16 __v8fp16 __attribute__((__vector_size__(16), __aligned__(16))); + + return (__m256) __builtin_convertvector((__v8fp16)__a, __v8sf); } #undef __DEFAULT_FN_ATTRS128 diff --git a/lib/include/float.h b/lib/include/float.h index 84551af473..82974f6004 100644 --- a/lib/include/float.h +++ b/lib/include/float.h @@ -7,13 +7,21 @@ *===-----------------------------------------------------------------------=== */ -#ifndef __CLANG_FLOAT_H -#define __CLANG_FLOAT_H - #if defined(__MVS__) && __has_include_next() +#include <__float_header_macro.h> #include_next #else +#if !defined(__need_infinity_nan) +#define __need_float_float +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \ + !defined(__STRICT_ANSI__) +#define __need_infinity_nan +#endif +#include <__float_header_macro.h> +#endif + +#ifdef __need_float_float /* If we're on MinGW, fall back to the system's float.h, which might have * additional definitions provided for Windows. * For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx @@ -26,162 +34,15 @@ # include_next -/* Undefine anything that we'll be redefining below. */ -# undef FLT_EVAL_METHOD -# undef FLT_ROUNDS -# undef FLT_RADIX -# undef FLT_MANT_DIG -# undef DBL_MANT_DIG -# undef LDBL_MANT_DIG -#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ - !defined(__STRICT_ANSI__) || \ - (defined(__cplusplus) && __cplusplus >= 201103L) || \ - (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) -# undef DECIMAL_DIG -# endif -# undef FLT_DIG -# undef DBL_DIG -# undef LDBL_DIG -# undef FLT_MIN_EXP -# undef DBL_MIN_EXP -# undef LDBL_MIN_EXP -# undef FLT_MIN_10_EXP -# undef DBL_MIN_10_EXP -# undef LDBL_MIN_10_EXP -# undef FLT_MAX_EXP -# undef DBL_MAX_EXP -# undef LDBL_MAX_EXP -# undef FLT_MAX_10_EXP -# undef DBL_MAX_10_EXP -# undef LDBL_MAX_10_EXP -# undef FLT_MAX -# undef DBL_MAX -# undef LDBL_MAX -# undef FLT_EPSILON -# undef DBL_EPSILON -# undef LDBL_EPSILON -# undef FLT_MIN -# undef DBL_MIN -# undef LDBL_MIN -#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \ - !defined(__STRICT_ANSI__) || \ - (defined(__cplusplus) && __cplusplus >= 201703L) || \ - (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) -# undef FLT_TRUE_MIN -# undef DBL_TRUE_MIN -# undef LDBL_TRUE_MIN -# undef FLT_DECIMAL_DIG -# undef DBL_DECIMAL_DIG -# undef LDBL_DECIMAL_DIG -# undef FLT_HAS_SUBNORM -# undef DBL_HAS_SUBNORM -# undef LDBL_HAS_SUBNORM -# endif -#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \ - !defined(__STRICT_ANSI__) -# undef FLT_NORM_MAX -# undef DBL_NORM_MAX -# undef LDBL_NORM_MAX -#endif #endif -#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \ - !defined(__STRICT_ANSI__) -# undef INFINITY -# undef NAN +#include <__float_float.h> +#undef __need_float_float #endif -/* Characteristics of floating point types, C99 5.2.4.2.2 */ - -#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ - (defined(__cplusplus) && __cplusplus >= 201103L) -#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__ +#ifdef __need_infinity_nan +#include <__float_infinity_nan.h> +#undef __need_infinity_nan #endif -#define FLT_ROUNDS (__builtin_flt_rounds()) -#define FLT_RADIX __FLT_RADIX__ - -#define FLT_MANT_DIG __FLT_MANT_DIG__ -#define DBL_MANT_DIG __DBL_MANT_DIG__ -#define LDBL_MANT_DIG __LDBL_MANT_DIG__ - -#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ - !defined(__STRICT_ANSI__) || \ - (defined(__cplusplus) && __cplusplus >= 201103L) || \ - (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) -# define DECIMAL_DIG __DECIMAL_DIG__ -#endif - -#define FLT_DIG __FLT_DIG__ -#define DBL_DIG __DBL_DIG__ -#define LDBL_DIG __LDBL_DIG__ - -#define FLT_MIN_EXP __FLT_MIN_EXP__ -#define DBL_MIN_EXP __DBL_MIN_EXP__ -#define LDBL_MIN_EXP __LDBL_MIN_EXP__ - -#define FLT_MIN_10_EXP __FLT_MIN_10_EXP__ -#define DBL_MIN_10_EXP __DBL_MIN_10_EXP__ -#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__ - -#define FLT_MAX_EXP __FLT_MAX_EXP__ -#define DBL_MAX_EXP __DBL_MAX_EXP__ -#define LDBL_MAX_EXP __LDBL_MAX_EXP__ - -#define FLT_MAX_10_EXP __FLT_MAX_10_EXP__ -#define DBL_MAX_10_EXP __DBL_MAX_10_EXP__ -#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__ - -#define FLT_MAX __FLT_MAX__ -#define DBL_MAX __DBL_MAX__ -#define LDBL_MAX __LDBL_MAX__ - -#define FLT_EPSILON __FLT_EPSILON__ -#define DBL_EPSILON __DBL_EPSILON__ -#define LDBL_EPSILON __LDBL_EPSILON__ - -#define FLT_MIN __FLT_MIN__ -#define DBL_MIN __DBL_MIN__ -#define LDBL_MIN __LDBL_MIN__ - -#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \ - !defined(__STRICT_ANSI__) || \ - (defined(__cplusplus) && __cplusplus >= 201703L) || \ - (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) -# define FLT_TRUE_MIN __FLT_DENORM_MIN__ -# define DBL_TRUE_MIN __DBL_DENORM_MIN__ -# define LDBL_TRUE_MIN __LDBL_DENORM_MIN__ -# define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__ -# define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__ -# define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__ -# define FLT_HAS_SUBNORM __FLT_HAS_DENORM__ -# define DBL_HAS_SUBNORM __DBL_HAS_DENORM__ -# define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__ -#endif - -#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \ - !defined(__STRICT_ANSI__) - /* C23 5.2.5.3.3p29-30 */ -# define INFINITY (__builtin_inff()) -# define NAN (__builtin_nanf("")) - /* C23 5.2.5.3.3p32 */ -# define FLT_NORM_MAX __FLT_NORM_MAX__ -# define DBL_NORM_MAX __DBL_NORM_MAX__ -# define LDBL_NORM_MAX __LDBL_NORM_MAX__ -#endif - -#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__ -# define FLT16_MANT_DIG __FLT16_MANT_DIG__ -# define FLT16_DECIMAL_DIG __FLT16_DECIMAL_DIG__ -# define FLT16_DIG __FLT16_DIG__ -# define FLT16_MIN_EXP __FLT16_MIN_EXP__ -# define FLT16_MIN_10_EXP __FLT16_MIN_10_EXP__ -# define FLT16_MAX_EXP __FLT16_MAX_EXP__ -# define FLT16_MAX_10_EXP __FLT16_MAX_10_EXP__ -# define FLT16_MAX __FLT16_MAX__ -# define FLT16_EPSILON __FLT16_EPSILON__ -# define FLT16_MIN __FLT16_MIN__ -# define FLT16_TRUE_MIN __FLT16_TRUE_MIN__ -#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */ #endif /* __MVS__ */ -#endif /* __CLANG_FLOAT_H */ diff --git a/lib/include/fma4intrin.h b/lib/include/fma4intrin.h index 694801b3e8..20b8030b77 100644 --- a/lib/include/fma4intrin.h +++ b/lib/include/fma4intrin.h @@ -20,100 +20,100 @@ #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(256))) -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#else +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#endif + +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) { + return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) { + return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B, + (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) { + return _mm_set_ss(__builtin_elementwise_fma(__A[0], __B[0], __C[0])); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) { + return _mm_set_sd(__builtin_elementwise_fma(__A[0], __B[0], __C[0])); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) { + return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, + -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) { + return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B, + -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) { + return _mm_set_ss(__builtin_elementwise_fma(__A[0], __B[0], -__C[0])); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) { + return _mm_set_sd(__builtin_elementwise_fma(__A[0], __B[0], -__C[0])); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) { + return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) { + return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, + (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) { + return _mm_set_ss(__builtin_elementwise_fma(-__A[0], __B[0], __C[0])); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) { + return _mm_set_sd(__builtin_elementwise_fma(-__A[0], __B[0], __C[0])); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) { + return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, + -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) { + return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, + -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) { + return _mm_set_ss(__builtin_elementwise_fma(-__A[0], __B[0], -__C[0])); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) { + return _mm_set_sd(__builtin_elementwise_fma(-__A[0], __B[0], -__C[0])); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 @@ -140,52 +140,52 @@ _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) -{ - return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) { + return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, + (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) -{ - return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) { + return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B, + (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C) -{ - return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C) { + return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, + -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) -{ - return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) { + return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B, + -(__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) -{ - return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) { + return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, + (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) -{ - return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) { + return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, + (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) -{ - return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) { + return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, + -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) -{ - return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) { + return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, + -(__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 @@ -214,5 +214,7 @@ _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif /* __FMA4INTRIN_H */ diff --git a/lib/include/fmaintrin.h b/lib/include/fmaintrin.h index 22d1a780bb..eba527f360 100644 --- a/lib/include/fmaintrin.h +++ b/lib/include/fmaintrin.h @@ -15,8 +15,20 @@ #define __FMAINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("fma"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("fma"), \ + __min_vector_width__(256))) + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#else +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#endif /// Computes a multiply-add of 128-bit vectors of [4 x float]. /// For each element, computes (__A * __B) + __C . @@ -32,10 +44,11 @@ /// \param __C /// A 128-bit vector of [4 x float] containing the addend. /// \returns A 128-bit vector of [4 x float] containing the result. -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); } /// Computes a multiply-add of 128-bit vectors of [2 x double]. @@ -52,10 +65,11 @@ _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) /// \param __C /// A 128-bit vector of [2 x double] containing the addend. /// \returns A 128-bit [2 x double] vector containing the result. -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B, + (__v2df)__C); } /// Computes a scalar multiply-add of the single-precision values in the @@ -81,10 +95,10 @@ _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) /// 32 bits. /// \returns A 128-bit vector of [4 x float] containing the result in the low /// 32 bits and a copy of \a __A[127:32] in the upper 96 bits. -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) { + __A[0] = __builtin_elementwise_fma(__A[0], __B[0], __C[0]); + return __A; } /// Computes a scalar multiply-add of the double-precision values in the @@ -110,10 +124,10 @@ _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) /// 64 bits. /// \returns A 128-bit vector of [2 x double] containing the result in the low /// 64 bits and a copy of \a __A[127:64] in the upper 64 bits. -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) { + __A[0] = __builtin_elementwise_fma(__A[0], __B[0], __C[0]); + return __A; } /// Computes a multiply-subtract of 128-bit vectors of [4 x float]. @@ -130,10 +144,11 @@ _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) /// \param __C /// A 128-bit vector of [4 x float] containing the subtrahend. /// \returns A 128-bit vector of [4 x float] containing the result. -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); + return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, + -(__v4sf)__C); } /// Computes a multiply-subtract of 128-bit vectors of [2 x double]. @@ -150,10 +165,11 @@ _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) /// \param __C /// A 128-bit vector of [2 x double] containing the addend. /// \returns A 128-bit vector of [2 x double] containing the result. -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); + return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B, + -(__v2df)__C); } /// Computes a scalar multiply-subtract of the single-precision values in @@ -179,10 +195,10 @@ _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) /// 32 bits. /// \returns A 128-bit vector of [4 x float] containing the result in the low /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits. -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) { + __A[0] = __builtin_elementwise_fma(__A[0], __B[0], -__C[0]); + return __A; } /// Computes a scalar multiply-subtract of the double-precision values in @@ -208,10 +224,10 @@ _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) /// 64 bits. /// \returns A 128-bit vector of [2 x double] containing the result in the low /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits. -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) { + __A[0] = __builtin_elementwise_fma(__A[0], __B[0], -__C[0]); + return __A; } /// Computes a negated multiply-add of 128-bit vectors of [4 x float]. @@ -228,10 +244,11 @@ _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) /// \param __C /// A 128-bit vector of [4 x float] containing the addend. /// \returns A 128-bit [4 x float] vector containing the result. -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); } /// Computes a negated multiply-add of 128-bit vectors of [2 x double]. @@ -248,10 +265,11 @@ _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) /// \param __C /// A 128-bit vector of [2 x double] containing the addend. /// \returns A 128-bit vector of [2 x double] containing the result. -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, + (__v2df)__C); } /// Computes a scalar negated multiply-add of the single-precision values in @@ -277,10 +295,10 @@ _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) /// 32 bits. /// \returns A 128-bit vector of [4 x float] containing the result in the low /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits. -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) { + __A[0] = __builtin_elementwise_fma(__A[0], -__B[0], __C[0]); + return __A; } /// Computes a scalar negated multiply-add of the double-precision values @@ -306,10 +324,10 @@ _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) /// 64 bits. /// \returns A 128-bit vector of [2 x double] containing the result in the low /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits. -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) { + __A[0] = __builtin_elementwise_fma(__A[0], -__B[0], __C[0]); + return __A; } /// Computes a negated multiply-subtract of 128-bit vectors of [4 x float]. @@ -326,10 +344,11 @@ _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) /// \param __C /// A 128-bit vector of [4 x float] containing the subtrahend. /// \returns A 128-bit vector of [4 x float] containing the result. -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); + return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, + -(__v4sf)__C); } /// Computes a negated multiply-subtract of 128-bit vectors of [2 x double]. @@ -346,10 +365,11 @@ _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) /// \param __C /// A 128-bit vector of [2 x double] containing the subtrahend. /// \returns A 128-bit vector of [2 x double] containing the result. -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); + return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, + -(__v2df)__C); } /// Computes a scalar negated multiply-subtract of the single-precision @@ -375,10 +395,10 @@ _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) /// 32 bits. /// \returns A 128-bit vector of [4 x float] containing the result in the low /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits. -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) { + __A[0] = __builtin_elementwise_fma(__A[0], -__B[0], -__C[0]); + return __A; } /// Computes a scalar negated multiply-subtract of the double-precision @@ -404,10 +424,10 @@ _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) /// 64 bits. /// \returns A 128-bit vector of [2 x double] containing the result in the low /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits. -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) { + __A[0] = __builtin_elementwise_fma(__A[0], -__B[0], -__C[0]); + return __A; } /// Computes a multiply with alternating add/subtract of 128-bit vectors of @@ -528,10 +548,11 @@ _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) /// \param __C /// A 256-bit vector of [8 x float] containing the addend. /// \returns A 256-bit vector of [8 x float] containing the result. -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); + return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, + (__v8sf)__C); } /// Computes a multiply-add of 256-bit vectors of [4 x double]. @@ -548,10 +569,11 @@ _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) /// \param __C /// A 256-bit vector of [4 x double] containing the addend. /// \returns A 256-bit vector of [4 x double] containing the result. -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); + return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B, + (__v4df)__C); } /// Computes a multiply-subtract of 256-bit vectors of [8 x float]. @@ -568,10 +590,11 @@ _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) /// \param __C /// A 256-bit vector of [8 x float] containing the subtrahend. /// \returns A 256-bit vector of [8 x float] containing the result. -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); + return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, + -(__v8sf)__C); } /// Computes a multiply-subtract of 256-bit vectors of [4 x double]. @@ -588,10 +611,11 @@ _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) /// \param __C /// A 256-bit vector of [4 x double] containing the subtrahend. /// \returns A 256-bit vector of [4 x double] containing the result. -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); + return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B, + -(__v4df)__C); } /// Computes a negated multiply-add of 256-bit vectors of [8 x float]. @@ -608,10 +632,11 @@ _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) /// \param __C /// A 256-bit vector of [8 x float] containing the addend. /// \returns A 256-bit vector of [8 x float] containing the result. -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); + return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, + (__v8sf)__C); } /// Computes a negated multiply-add of 256-bit vectors of [4 x double]. @@ -628,10 +653,11 @@ _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) /// \param __C /// A 256-bit vector of [4 x double] containing the addend. /// \returns A 256-bit vector of [4 x double] containing the result. -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); + return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, + (__v4df)__C); } /// Computes a negated multiply-subtract of 256-bit vectors of [8 x float]. @@ -648,10 +674,11 @@ _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) /// \param __C /// A 256-bit vector of [8 x float] containing the subtrahend. /// \returns A 256-bit vector of [8 x float] containing the result. -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); + return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, + -(__v8sf)__C); } /// Computes a negated multiply-subtract of 256-bit vectors of [4 x double]. @@ -668,10 +695,11 @@ _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) /// \param __C /// A 256-bit vector of [4 x double] containing the subtrahend. /// \returns A 256-bit vector of [4 x double] containing the result. -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); + return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, + -(__v4df)__C); } /// Computes a multiply with alternating add/subtract of 256-bit vectors of @@ -792,5 +820,7 @@ _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif /* __FMAINTRIN_H */ diff --git a/lib/include/gfniintrin.h b/lib/include/gfniintrin.h index 9a5743d4b6..2c559f13c6 100644 --- a/lib/include/gfniintrin.h +++ b/lib/include/gfniintrin.h @@ -14,29 +14,36 @@ #ifndef __GFNIINTRIN_H #define __GFNIINTRIN_H -#if defined(__EVEX512__) && !defined(__AVX10_1_512__) /* Default attributes for simple form (no masking). */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("gfni,no-evex512"), __min_vector_width__(128))) + __attribute__((__always_inline__, __nodebug__, __target__("gfni"), \ + __min_vector_width__(128))) constexpr /* Default attributes for YMM unmasked form. */ #define __DEFAULT_FN_ATTRS_Y \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx,gfni,no-evex512"), \ - __min_vector_width__(256))) + __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), \ + __min_vector_width__(256))) constexpr /* Default attributes for VLX masked forms. */ #define __DEFAULT_FN_ATTRS_VL128 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512bw,avx512vl,gfni,no-evex512"), \ - __min_vector_width__(128))) + __target__("avx512bw,avx512vl,gfni"), \ + __min_vector_width__(128))) constexpr #define __DEFAULT_FN_ATTRS_VL256 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512bw,avx512vl,gfni,no-evex512"), \ - __min_vector_width__(256))) + __target__("avx512bw,avx512vl,gfni"), \ + __min_vector_width__(256))) constexpr + +/* Default attributes for ZMM unmasked forms. */ +#define __DEFAULT_FN_ATTRS_Z \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512f,gfni"), \ + __min_vector_width__(512))) constexpr +/* Default attributes for ZMM masked forms. */ +#define __DEFAULT_FN_ATTRS_Z_MASK \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), \ + __min_vector_width__(512))) constexpr #else -/* Default attributes for simple form (no masking). */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("gfni"), \ __min_vector_width__(128))) @@ -55,18 +62,16 @@ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512bw,avx512vl,gfni"), \ __min_vector_width__(256))) -#endif /* Default attributes for ZMM unmasked forms. */ #define __DEFAULT_FN_ATTRS_Z \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512f,evex512,gfni"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512f,gfni"), \ __min_vector_width__(512))) /* Default attributes for ZMM masked forms. */ #define __DEFAULT_FN_ATTRS_Z_MASK \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512bw,evex512,gfni"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), \ __min_vector_width__(512))) +#endif #define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \ ((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \ diff --git a/lib/include/hexagon_types.h b/lib/include/hexagon_types.h index 8e73fad4bc..54e8c1dd69 100644 --- a/lib/include/hexagon_types.h +++ b/lib/include/hexagon_types.h @@ -11,6 +11,11 @@ #include +// Save and undefine B0 to avoid conflicts with POSIX termios.h which +// defines B0 as a macro for baud rate 0. +#pragma push_macro("B0") +#undef B0 + /* Hexagon names */ #define HEXAGON_Vect HEXAGON_Vect64 #define HEXAGON_V_GET_D HEXAGON_V64_GET_D @@ -697,9 +702,8 @@ public: }; // Extract byte methods - signed char B0(void) { - return HEXAGON_V64_GET_B0(data); - }; + signed char b0(void) { return HEXAGON_V64_GET_B0(data); }; + signed char B0(void) { return b0(); }; signed char B1(void) { return HEXAGON_V64_GET_B1(data); }; @@ -776,9 +780,10 @@ public: }; // Set byte methods - HEXAGON_Vect64C B0(signed char b) { + HEXAGON_Vect64C b0(signed char b) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_B0(data, b)); }; + HEXAGON_Vect64C B0(signed char b) { return b0(b); }; HEXAGON_Vect64C B1(signed char b) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_B1(data, b)); }; @@ -1121,9 +1126,8 @@ public: }; // Extract byte methods - signed char B0(void) { - return HEXAGON_V32_GET_B0(data); - }; + signed char b0(void) { return HEXAGON_V32_GET_B0(data); }; + signed char B0(void) { return b0(); }; signed char B1(void) { return HEXAGON_V32_GET_B1(data); }; @@ -1162,9 +1166,10 @@ public: }; // Set byte methods - HEXAGON_Vect32C B0(signed char b) { + HEXAGON_Vect32C b0(signed char b) { return HEXAGON_Vect32C(HEXAGON_V32_PUT_B0(data, b)); }; + HEXAGON_Vect32C B0(signed char b) { return b0(b); }; HEXAGON_Vect32C B1(signed char b) { return HEXAGON_Vect32C(HEXAGON_V32_PUT_B1(data, b)); }; @@ -1924,9 +1929,8 @@ public: }; // Extract byte methods - signed char B0(void) { - return Q6V64_GET_B0(data); - }; + signed char b0(void) { return Q6V64_GET_B0(data); }; + signed char B0(void) { return b0(); }; signed char B1(void) { return Q6V64_GET_B1(data); }; @@ -2003,9 +2007,8 @@ public: }; // Set byte methods - Q6Vect64C B0(signed char b) { - return Q6Vect64C(Q6V64_PUT_B0(data, b)); - }; + Q6Vect64C b0(signed char b) { return Q6Vect64C(Q6V64_PUT_B0(data, b)); }; + Q6Vect64C B0(signed char b) { return b0(b); }; Q6Vect64C B1(signed char b) { return Q6Vect64C(Q6V64_PUT_B1(data, b)); }; @@ -2348,9 +2351,8 @@ public: }; // Extract byte methods - signed char B0(void) { - return Q6V32_GET_B0(data); - }; + signed char b0(void) { return Q6V32_GET_B0(data); }; + signed char B0(void) { return b0(); }; signed char B1(void) { return Q6V32_GET_B1(data); }; @@ -2389,9 +2391,8 @@ public: }; // Set byte methods - Q6Vect32C B0(signed char b) { - return Q6Vect32C(Q6V32_PUT_B0(data, b)); - }; + Q6Vect32C b0(signed char b) { return Q6Vect32C(Q6V32_PUT_B0(data, b)); }; + Q6Vect32C B0(signed char b) { return b0(b); }; Q6Vect32C B1(signed char b) { return Q6Vect32C(Q6V32_PUT_B1(data, b)); }; @@ -2622,4 +2623,6 @@ typedef struct hexagon_udma_descriptor_type1_s unsigned int dstwidthoffset:16; } hexagon_udma_descriptor_type1_t; +#pragma pop_macro("B0") + #endif /* !HEXAGON_TYPES_H */ diff --git a/lib/include/hvx_hexagon_protos.h b/lib/include/hvx_hexagon_protos.h index fd120a589f..981fbd1a12 100644 --- a/lib/include/hvx_hexagon_protos.h +++ b/lib/include/hvx_hexagon_protos.h @@ -19,7 +19,6 @@ #define __BUILTIN_VECTOR_WRAP(a) a #endif -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rd32=vextract(Vu32,Rs32) C Intrinsic Prototype: Word32 Q6_R_vextract_VR(HVX_Vector Vu, Word32 Rs) @@ -28,9 +27,7 @@ ========================================================================== */ #define Q6_R_vextract_VR(Vu,Rs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_extractw)(Vu,Rs) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=hi(Vss32) C Intrinsic Prototype: HVX_Vector Q6_V_hi_W(HVX_VectorPair Vss) @@ -39,9 +36,7 @@ ========================================================================== */ #define Q6_V_hi_W(Vss) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_hi)(Vss) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=lo(Vss32) C Intrinsic Prototype: HVX_Vector Q6_V_lo_W(HVX_VectorPair Vss) @@ -50,9 +45,7 @@ ========================================================================== */ #define Q6_V_lo_W(Vss) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lo)(Vss) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vsplat(Rt32) C Intrinsic Prototype: HVX_Vector Q6_V_vsplat_R(Word32 Rt) @@ -61,9 +54,7 @@ ========================================================================== */ #define Q6_V_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatw)(Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=and(Qs4,Qt4) C Intrinsic Prototype: HVX_VectorPred Q6_Q_and_QQ(HVX_VectorPred Qs, HVX_VectorPred Qt) @@ -72,9 +63,7 @@ ========================================================================== */ #define Q6_Q_and_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=and(Qs4,!Qt4) C Intrinsic Prototype: HVX_VectorPred Q6_Q_and_QQn(HVX_VectorPred Qs, HVX_VectorPred Qt) @@ -83,9 +72,7 @@ ========================================================================== */ #define Q6_Q_and_QQn(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and_n)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=not(Qs4) C Intrinsic Prototype: HVX_VectorPred Q6_Q_not_Q(HVX_VectorPred Qs) @@ -94,9 +81,7 @@ ========================================================================== */ #define Q6_Q_not_Q(Qs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_not)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1))),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=or(Qs4,Qt4) C Intrinsic Prototype: HVX_VectorPred Q6_Q_or_QQ(HVX_VectorPred Qs, HVX_VectorPred Qt) @@ -105,9 +90,7 @@ ========================================================================== */ #define Q6_Q_or_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=or(Qs4,!Qt4) C Intrinsic Prototype: HVX_VectorPred Q6_Q_or_QQn(HVX_VectorPred Qs, HVX_VectorPred Qt) @@ -116,9 +99,7 @@ ========================================================================== */ #define Q6_Q_or_QQn(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or_n)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vsetq(Rt32) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vsetq_R(Word32 Rt) @@ -127,9 +108,7 @@ ========================================================================== */ #define Q6_Q_vsetq_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2)(Rt)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=xor(Qs4,Qt4) C Intrinsic Prototype: HVX_VectorPred Q6_Q_xor_QQ(HVX_VectorPred Qs, HVX_VectorPred Qt) @@ -138,9 +117,7 @@ ========================================================================== */ #define Q6_Q_xor_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) vmem(Rt32+#s4)=Vs32 C Intrinsic Prototype: void Q6_vmem_QnRIV(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs) @@ -149,9 +126,7 @@ ========================================================================== */ #define Q6_vmem_QnRIV(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nqpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) vmem(Rt32+#s4):nt=Vs32 C Intrinsic Prototype: void Q6_vmem_QnRIV_nt(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs) @@ -160,9 +135,7 @@ ========================================================================== */ #define Q6_vmem_QnRIV_nt(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) vmem(Rt32+#s4):nt=Vs32 C Intrinsic Prototype: void Q6_vmem_QRIV_nt(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs) @@ -171,9 +144,7 @@ ========================================================================== */ #define Q6_vmem_QRIV_nt(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) vmem(Rt32+#s4)=Vs32 C Intrinsic Prototype: void Q6_vmem_QRIV(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs) @@ -182,9 +153,7 @@ ========================================================================== */ #define Q6_vmem_QRIV(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_qpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vabsdiff(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vabsdiff_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -193,9 +162,7 @@ ========================================================================== */ #define Q6_Vuh_vabsdiff_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vabsdiff(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vub_vabsdiff_VubVub(HVX_Vector Vu, HVX_Vector Vv) @@ -204,9 +171,7 @@ ========================================================================== */ #define Q6_Vub_vabsdiff_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffub)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vabsdiff(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vabsdiff_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) @@ -215,9 +180,7 @@ ========================================================================== */ #define Q6_Vuh_vabsdiff_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffuh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uw=vabsdiff(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vabsdiff_VwVw(HVX_Vector Vu, HVX_Vector Vv) @@ -226,9 +189,7 @@ ========================================================================== */ #define Q6_Vuw_vabsdiff_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffw)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vabs(Vu32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vabs_Vh(HVX_Vector Vu) @@ -237,9 +198,7 @@ ========================================================================== */ #define Q6_Vh_vabs_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vabs(Vu32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vabs_Vh_sat(HVX_Vector Vu) @@ -248,9 +207,7 @@ ========================================================================== */ #define Q6_Vh_vabs_Vh_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh_sat)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vabs(Vu32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vabs_Vw(HVX_Vector Vu) @@ -259,9 +216,7 @@ ========================================================================== */ #define Q6_Vw_vabs_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vabs(Vu32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vabs_Vw_sat(HVX_Vector Vu) @@ -270,9 +225,7 @@ ========================================================================== */ #define Q6_Vw_vabs_Vw_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw_sat)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vadd(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vadd_VbVb(HVX_Vector Vu, HVX_Vector Vv) @@ -281,9 +234,7 @@ ========================================================================== */ #define Q6_Vb_vadd_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.b=vadd(Vuu32.b,Vvv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vadd_WbWb(HVX_VectorPair Vuu, HVX_VectorPair Vvv) @@ -292,9 +243,7 @@ ========================================================================== */ #define Q6_Wb_vadd_WbWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb_dv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) Vx32.b+=Vu32.b C Intrinsic Prototype: HVX_Vector Q6_Vb_condacc_QnVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) @@ -303,9 +252,7 @@ ========================================================================== */ #define Q6_Vb_condacc_QnVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) Vx32.b+=Vu32.b C Intrinsic Prototype: HVX_Vector Q6_Vb_condacc_QVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) @@ -314,9 +261,7 @@ ========================================================================== */ #define Q6_Vb_condacc_QVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vadd(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vadd_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -325,9 +270,7 @@ ========================================================================== */ #define Q6_Vh_vadd_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vadd(Vuu32.h,Vvv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vadd_WhWh(HVX_VectorPair Vuu, HVX_VectorPair Vvv) @@ -336,9 +279,7 @@ ========================================================================== */ #define Q6_Wh_vadd_WhWh(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh_dv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) Vx32.h+=Vu32.h C Intrinsic Prototype: HVX_Vector Q6_Vh_condacc_QnVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) @@ -347,9 +288,7 @@ ========================================================================== */ #define Q6_Vh_condacc_QnVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) Vx32.h+=Vu32.h C Intrinsic Prototype: HVX_Vector Q6_Vh_condacc_QVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) @@ -358,9 +297,7 @@ ========================================================================== */ #define Q6_Vh_condacc_QVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vadd(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vadd_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -369,9 +306,7 @@ ========================================================================== */ #define Q6_Vh_vadd_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vadd(Vuu32.h,Vvv32.h):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vadd_WhWh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) @@ -380,9 +315,7 @@ ========================================================================== */ #define Q6_Wh_vadd_WhWh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat_dv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vadd(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -391,9 +324,7 @@ ========================================================================== */ #define Q6_Ww_vadd_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vadd(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vadd_VubVub(HVX_Vector Vu, HVX_Vector Vv) @@ -402,9 +333,7 @@ ========================================================================== */ #define Q6_Wh_vadd_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vadd(Vu32.ub,Vv32.ub):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vadd_VubVub_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -413,9 +342,7 @@ ========================================================================== */ #define Q6_Vub_vadd_VubVub_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.ub=vadd(Vuu32.ub,Vvv32.ub):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wub_vadd_WubWub_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) @@ -424,9 +351,7 @@ ========================================================================== */ #define Q6_Wub_vadd_WubWub_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat_dv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vadd(Vu32.uh,Vv32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vadd_VuhVuh_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -435,9 +360,7 @@ ========================================================================== */ #define Q6_Vuh_vadd_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uh=vadd(Vuu32.uh,Vvv32.uh):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vadd_WuhWuh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) @@ -446,9 +369,7 @@ ========================================================================== */ #define Q6_Wuh_vadd_WuhWuh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat_dv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vadd(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) @@ -457,9 +378,7 @@ ========================================================================== */ #define Q6_Ww_vadd_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vadd(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVw(HVX_Vector Vu, HVX_Vector Vv) @@ -468,9 +387,7 @@ ========================================================================== */ #define Q6_Vw_vadd_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vadd(Vuu32.w,Vvv32.w) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_WwWw(HVX_VectorPair Vuu, HVX_VectorPair Vvv) @@ -479,9 +396,7 @@ ========================================================================== */ #define Q6_Ww_vadd_WwWw(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw_dv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) Vx32.w+=Vu32.w C Intrinsic Prototype: HVX_Vector Q6_Vw_condacc_QnVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) @@ -490,9 +405,7 @@ ========================================================================== */ #define Q6_Vw_condacc_QnVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) Vx32.w+=Vu32.w C Intrinsic Prototype: HVX_Vector Q6_Vw_condacc_QVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) @@ -501,9 +414,7 @@ ========================================================================== */ #define Q6_Vw_condacc_QVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vadd(Vu32.w,Vv32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -512,9 +423,7 @@ ========================================================================== */ #define Q6_Vw_vadd_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vadd(Vuu32.w,Vvv32.w):sat C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_WwWw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) @@ -523,9 +432,7 @@ ========================================================================== */ #define Q6_Ww_vadd_WwWw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat_dv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=valign(Vu32,Vv32,Rt8) C Intrinsic Prototype: HVX_Vector Q6_V_valign_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) @@ -534,9 +441,7 @@ ========================================================================== */ #define Q6_V_valign_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignb)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=valign(Vu32,Vv32,#u3) C Intrinsic Prototype: HVX_Vector Q6_V_valign_VVI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) @@ -545,9 +450,7 @@ ========================================================================== */ #define Q6_V_valign_VVI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignbi)(Vu,Vv,Iu3) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vand(Vu32,Vv32) C Intrinsic Prototype: HVX_Vector Q6_V_vand_VV(HVX_Vector Vu, HVX_Vector Vv) @@ -556,9 +459,7 @@ ========================================================================== */ #define Q6_V_vand_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vand)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vand(Qu4,Rt32) C Intrinsic Prototype: HVX_Vector Q6_V_vand_QR(HVX_VectorPred Qu, Word32 Rt) @@ -567,9 +468,7 @@ ========================================================================== */ #define Q6_V_vand_QR(Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32|=vand(Qu4,Rt32) C Intrinsic Prototype: HVX_Vector Q6_V_vandor_VQR(HVX_Vector Vx, HVX_VectorPred Qu, Word32 Rt) @@ -578,9 +477,7 @@ ========================================================================== */ #define Q6_V_vandor_VQR(Vx,Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt_acc)(Vx,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vand(Vu32,Rt32) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vand_VR(HVX_Vector Vu, Word32 Rt) @@ -589,9 +486,7 @@ ========================================================================== */ #define Q6_Q_vand_VR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)(Vu,Rt)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vand(Vu32,Rt32) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vandor_QVR(HVX_VectorPred Qx, HVX_Vector Vu, Word32 Rt) @@ -600,9 +495,7 @@ ========================================================================== */ #define Q6_Q_vandor_QVR(Qx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt_acc)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Rt)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasl(Vu32.h,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vh_vasl_VhR(HVX_Vector Vu, Word32 Rt) @@ -611,9 +504,7 @@ ========================================================================== */ #define Q6_Vh_vasl_VhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasl(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vasl_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -622,9 +513,7 @@ ========================================================================== */ #define Q6_Vh_vasl_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslhv)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vasl(Vu32.w,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vw_vasl_VwR(HVX_Vector Vu, Word32 Rt) @@ -633,9 +522,7 @@ ========================================================================== */ #define Q6_Vw_vasl_VwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vasl(Vu32.w,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vw_vaslacc_VwVwR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) @@ -644,9 +531,7 @@ ========================================================================== */ #define Q6_Vw_vaslacc_VwVwR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw_acc)(Vx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vasl(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vasl_VwVw(HVX_Vector Vu, HVX_Vector Vv) @@ -655,9 +540,7 @@ ========================================================================== */ #define Q6_Vw_vasl_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslwv)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasr(Vu32.h,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VhR(HVX_Vector Vu, Word32 Rt) @@ -666,9 +549,7 @@ ========================================================================== */ #define Q6_Vh_vasr_VhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vasr(Vu32.h,Vv32.h,Rt8):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vb_vasr_VhVhR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) @@ -677,9 +558,7 @@ ========================================================================== */ #define Q6_Vb_vasr_VhVhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbrndsat)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vasr(Vu32.h,Vv32.h,Rt8):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VhVhR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) @@ -688,9 +567,7 @@ ========================================================================== */ #define Q6_Vub_vasr_VhVhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubrndsat)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vasr(Vu32.h,Vv32.h,Rt8):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VhVhR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) @@ -699,9 +576,7 @@ ========================================================================== */ #define Q6_Vub_vasr_VhVhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubsat)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasr(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -710,9 +585,7 @@ ========================================================================== */ #define Q6_Vh_vasr_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhv)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vasr(Vu32.w,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vw_vasr_VwR(HVX_Vector Vu, Word32 Rt) @@ -721,9 +594,7 @@ ========================================================================== */ #define Q6_Vw_vasr_VwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vasr(Vu32.w,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vw_vasracc_VwVwR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) @@ -732,9 +603,7 @@ ========================================================================== */ #define Q6_Vw_vasracc_VwVwR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw_acc)(Vx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasr(Vu32.w,Vv32.w,Rt8) C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VwVwR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) @@ -743,9 +612,7 @@ ========================================================================== */ #define Q6_Vh_vasr_VwVwR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwh)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VwVwR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) @@ -754,9 +621,7 @@ ========================================================================== */ #define Q6_Vh_vasr_VwVwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhrndsat)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VwVwR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) @@ -765,9 +630,7 @@ ========================================================================== */ #define Q6_Vh_vasr_VwVwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhsat)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vasr(Vu32.w,Vv32.w,Rt8):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_VwVwR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) @@ -776,9 +639,7 @@ ========================================================================== */ #define Q6_Vuh_vasr_VwVwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhsat)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vasr(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vasr_VwVw(HVX_Vector Vu, HVX_Vector Vv) @@ -787,9 +648,7 @@ ========================================================================== */ #define Q6_Vw_vasr_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwv)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=Vu32 C Intrinsic Prototype: HVX_Vector Q6_V_equals_V(HVX_Vector Vu) @@ -798,9 +657,7 @@ ========================================================================== */ #define Q6_V_equals_V(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassign)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32=Vuu32 C Intrinsic Prototype: HVX_VectorPair Q6_W_equals_W(HVX_VectorPair Vuu) @@ -809,9 +666,7 @@ ========================================================================== */ #define Q6_W_equals_W(Vuu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassignp)(Vuu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vavg(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vavg_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -820,9 +675,7 @@ ========================================================================== */ #define Q6_Vh_vavg_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vavg(Vu32.h,Vv32.h):rnd C Intrinsic Prototype: HVX_Vector Q6_Vh_vavg_VhVh_rnd(HVX_Vector Vu, HVX_Vector Vv) @@ -831,9 +684,7 @@ ========================================================================== */ #define Q6_Vh_vavg_VhVh_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavghrnd)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vavg(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vub_vavg_VubVub(HVX_Vector Vu, HVX_Vector Vv) @@ -842,9 +693,7 @@ ========================================================================== */ #define Q6_Vub_vavg_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgub)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vavg(Vu32.ub,Vv32.ub):rnd C Intrinsic Prototype: HVX_Vector Q6_Vub_vavg_VubVub_rnd(HVX_Vector Vu, HVX_Vector Vv) @@ -853,9 +702,7 @@ ========================================================================== */ #define Q6_Vub_vavg_VubVub_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgubrnd)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vavg(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vavg_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) @@ -864,9 +711,7 @@ ========================================================================== */ #define Q6_Vuh_vavg_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vavg(Vu32.uh,Vv32.uh):rnd C Intrinsic Prototype: HVX_Vector Q6_Vuh_vavg_VuhVuh_rnd(HVX_Vector Vu, HVX_Vector Vv) @@ -875,9 +720,7 @@ ========================================================================== */ #define Q6_Vuh_vavg_VuhVuh_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguhrnd)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vavg(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vavg_VwVw(HVX_Vector Vu, HVX_Vector Vv) @@ -886,9 +729,7 @@ ========================================================================== */ #define Q6_Vw_vavg_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgw)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vavg(Vu32.w,Vv32.w):rnd C Intrinsic Prototype: HVX_Vector Q6_Vw_vavg_VwVw_rnd(HVX_Vector Vu, HVX_Vector Vv) @@ -897,9 +738,7 @@ ========================================================================== */ #define Q6_Vw_vavg_VwVw_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgwrnd)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vcl0(Vu32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vcl0_Vuh(HVX_Vector Vu) @@ -908,9 +747,7 @@ ========================================================================== */ #define Q6_Vuh_vcl0_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0h)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uw=vcl0(Vu32.uw) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vcl0_Vuw(HVX_Vector Vu) @@ -919,9 +756,7 @@ ========================================================================== */ #define Q6_Vuw_vcl0_Vuw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0w)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32=vcombine(Vu32,Vv32) C Intrinsic Prototype: HVX_VectorPair Q6_W_vcombine_VV(HVX_Vector Vu, HVX_Vector Vv) @@ -930,9 +765,7 @@ ========================================================================== */ #define Q6_W_vcombine_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcombine)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=#0 C Intrinsic Prototype: HVX_Vector Q6_V_vzero() @@ -941,9 +774,7 @@ ========================================================================== */ #define Q6_V_vzero() __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vd0)() -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vdeal(Vu32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vdeal_Vb(HVX_Vector Vu) @@ -952,9 +783,7 @@ ========================================================================== */ #define Q6_Vb_vdeal_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vdeale(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vdeale_VbVb(HVX_Vector Vu, HVX_Vector Vv) @@ -963,9 +792,7 @@ ========================================================================== */ #define Q6_Vb_vdeale_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb4w)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vdeal(Vu32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vdeal_Vh(HVX_Vector Vu) @@ -974,9 +801,7 @@ ========================================================================== */ #define Q6_Vh_vdeal_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealh)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32=vdeal(Vu32,Vv32,Rt8) C Intrinsic Prototype: HVX_VectorPair Q6_W_vdeal_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) @@ -985,9 +810,7 @@ ========================================================================== */ #define Q6_W_vdeal_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealvdd)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vdelta(Vu32,Vv32) C Intrinsic Prototype: HVX_Vector Q6_V_vdelta_VV(HVX_Vector Vu, HVX_Vector Vv) @@ -996,9 +819,7 @@ ========================================================================== */ #define Q6_V_vdelta_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdelta)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vdmpy(Vu32.ub,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vh_vdmpy_VubRb(HVX_Vector Vu, Word32 Rt) @@ -1007,9 +828,7 @@ ========================================================================== */ #define Q6_Vh_vdmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.h+=vdmpy(Vu32.ub,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vh_vdmpyacc_VhVubRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) @@ -1018,9 +837,7 @@ ========================================================================== */ #define Q6_Vh_vdmpyacc_VhVubRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_acc)(Vx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vdmpy(Vuu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vdmpy_WubRb(HVX_VectorPair Vuu, Word32 Rt) @@ -1029,9 +846,7 @@ ========================================================================== */ #define Q6_Wh_vdmpy_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv)(Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vdmpy(Vuu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vdmpyacc_WhWubRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) @@ -1040,9 +855,7 @@ ========================================================================== */ #define Q6_Wh_vdmpyacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv_acc)(Vxx,Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRb(HVX_Vector Vu, Word32 Rt) @@ -1051,9 +864,7 @@ ========================================================================== */ #define Q6_Vw_vdmpy_VhRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) @@ -1062,9 +873,7 @@ ========================================================================== */ #define Q6_Vw_vdmpyacc_VwVhRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_acc)(Vx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vdmpy(Vuu32.h,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vdmpy_WhRb(HVX_VectorPair Vuu, Word32 Rt) @@ -1073,9 +882,7 @@ ========================================================================== */ #define Q6_Ww_vdmpy_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv)(Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vdmpy(Vuu32.h,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vdmpyacc_WwWhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) @@ -1084,9 +891,7 @@ ========================================================================== */ #define Q6_Ww_vdmpyacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv_acc)(Vxx,Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vuu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_WhRh_sat(HVX_VectorPair Vuu, Word32 Rt) @@ -1095,9 +900,7 @@ ========================================================================== */ #define Q6_Vw_vdmpy_WhRh_sat(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat)(Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vuu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwWhRh_sat(HVX_Vector Vx, HVX_VectorPair Vuu, Word32 Rt) @@ -1106,9 +909,7 @@ ========================================================================== */ #define Q6_Vw_vdmpyacc_VwWhRh_sat(Vx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat_acc)(Vx,Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRh_sat(HVX_Vector Vu, Word32 Rt) @@ -1117,9 +918,7 @@ ========================================================================== */ #define Q6_Vw_vdmpy_VhRh_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRh_sat(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) @@ -1128,9 +927,7 @@ ========================================================================== */ #define Q6_Vw_vdmpyacc_VwVhRh_sat(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat_acc)(Vx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vuu32.h,Rt32.uh,#1):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_WhRuh_sat(HVX_VectorPair Vuu, Word32 Rt) @@ -1139,9 +936,7 @@ ========================================================================== */ #define Q6_Vw_vdmpy_WhRuh_sat(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat)(Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vuu32.h,Rt32.uh,#1):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwWhRuh_sat(HVX_Vector Vx, HVX_VectorPair Vuu, Word32 Rt) @@ -1150,9 +945,7 @@ ========================================================================== */ #define Q6_Vw_vdmpyacc_VwWhRuh_sat(Vx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat_acc)(Vx,Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Rt32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRuh_sat(HVX_Vector Vu, Word32 Rt) @@ -1161,9 +954,7 @@ ========================================================================== */ #define Q6_Vw_vdmpy_VhRuh_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Rt32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRuh_sat(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) @@ -1172,9 +963,7 @@ ========================================================================== */ #define Q6_Vw_vdmpyacc_VwVhRuh_sat(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat_acc)(Vx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -1183,9 +972,7 @@ ========================================================================== */ #define Q6_Vw_vdmpy_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhVh_sat(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) @@ -1194,9 +981,7 @@ ========================================================================== */ #define Q6_Vw_vdmpyacc_VwVhVh_sat(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat_acc)(Vx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vdsad(Vuu32.uh,Rt32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vdsad_WuhRuh(HVX_VectorPair Vuu, Word32 Rt) @@ -1205,9 +990,7 @@ ========================================================================== */ #define Q6_Wuw_vdsad_WuhRuh(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh)(Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uw+=vdsad(Vuu32.uh,Rt32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vdsadacc_WuwWuhRuh(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) @@ -1216,9 +999,7 @@ ========================================================================== */ #define Q6_Wuw_vdsadacc_WuwWuhRuh(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh_acc)(Vxx,Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.eq(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VbVb(HVX_Vector Vu, HVX_Vector Vv) @@ -1227,9 +1008,7 @@ ========================================================================== */ #define Q6_Q_vcmp_eq_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb)(Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.eq(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1238,9 +1017,7 @@ ========================================================================== */ #define Q6_Q_vcmp_eqand_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.eq(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1249,9 +1026,7 @@ ========================================================================== */ #define Q6_Q_vcmp_eqor_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.eq(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1260,9 +1035,7 @@ ========================================================================== */ #define Q6_Q_vcmp_eqxacc_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.eq(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -1271,9 +1044,7 @@ ========================================================================== */ #define Q6_Q_vcmp_eq_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh)(Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.eq(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1282,9 +1053,7 @@ ========================================================================== */ #define Q6_Q_vcmp_eqand_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.eq(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1293,9 +1062,7 @@ ========================================================================== */ #define Q6_Q_vcmp_eqor_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.eq(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1304,9 +1071,7 @@ ========================================================================== */ #define Q6_Q_vcmp_eqxacc_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.eq(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VwVw(HVX_Vector Vu, HVX_Vector Vv) @@ -1315,9 +1080,7 @@ ========================================================================== */ #define Q6_Q_vcmp_eq_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw)(Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.eq(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1326,9 +1089,7 @@ ========================================================================== */ #define Q6_Q_vcmp_eqand_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.eq(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1337,9 +1098,7 @@ ========================================================================== */ #define Q6_Q_vcmp_eqor_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.eq(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1348,9 +1107,7 @@ ========================================================================== */ #define Q6_Q_vcmp_eqxacc_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VbVb(HVX_Vector Vu, HVX_Vector Vv) @@ -1359,9 +1116,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gt_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb)(Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1370,9 +1125,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtand_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1381,9 +1134,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtor_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1392,9 +1143,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -1403,9 +1152,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gt_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth)(Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1414,9 +1161,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtand_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1425,9 +1170,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtor_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1436,9 +1179,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VubVub(HVX_Vector Vu, HVX_Vector Vv) @@ -1447,9 +1188,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gt_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub)(Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVubVub(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1458,9 +1197,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtand_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVubVub(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1469,9 +1206,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtor_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVubVub(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1480,9 +1215,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) @@ -1491,9 +1224,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gt_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh)(Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVuhVuh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1502,9 +1233,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtand_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVuhVuh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1513,9 +1242,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtor_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVuhVuh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1524,9 +1251,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.uw,Vv32.uw) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VuwVuw(HVX_Vector Vu, HVX_Vector Vv) @@ -1535,9 +1260,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gt_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw)(Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.uw,Vv32.uw) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVuwVuw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1546,9 +1269,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtand_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.uw,Vv32.uw) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVuwVuw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1557,9 +1278,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtor_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.uw,Vv32.uw) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVuwVuw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1568,9 +1287,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VwVw(HVX_Vector Vu, HVX_Vector Vv) @@ -1579,9 +1296,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gt_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw)(Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1590,9 +1305,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtand_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1601,9 +1314,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtor_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) @@ -1612,9 +1323,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w=vinsert(Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vw_vinsert_VwR(HVX_Vector Vx, Word32 Rt) @@ -1623,9 +1332,7 @@ ========================================================================== */ #define Q6_Vw_vinsert_VwR(Vx,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vinsertwr)(Vx,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vlalign(Vu32,Vv32,Rt8) C Intrinsic Prototype: HVX_Vector Q6_V_vlalign_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) @@ -1634,9 +1341,7 @@ ========================================================================== */ #define Q6_V_vlalign_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignb)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vlalign(Vu32,Vv32,#u3) C Intrinsic Prototype: HVX_Vector Q6_V_vlalign_VVI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) @@ -1645,9 +1350,7 @@ ========================================================================== */ #define Q6_V_vlalign_VVI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignbi)(Vu,Vv,Iu3) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vlsr(Vu32.uh,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vlsr_VuhR(HVX_Vector Vu, Word32 Rt) @@ -1656,9 +1359,7 @@ ========================================================================== */ #define Q6_Vuh_vlsr_VuhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrh)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vlsr(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vlsr_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -1667,9 +1368,7 @@ ========================================================================== */ #define Q6_Vh_vlsr_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrhv)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uw=vlsr(Vu32.uw,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vlsr_VuwR(HVX_Vector Vu, Word32 Rt) @@ -1678,9 +1377,7 @@ ========================================================================== */ #define Q6_Vuw_vlsr_VuwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrw)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vlsr(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vlsr_VwVw(HVX_Vector Vu, HVX_Vector Vv) @@ -1689,9 +1386,7 @@ ========================================================================== */ #define Q6_Vw_vlsr_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrwv)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vlut32(Vu32.b,Vv32.b,Rt8) C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32_VbVbR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) @@ -1700,9 +1395,7 @@ ========================================================================== */ #define Q6_Vb_vlut32_VbVbR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.b|=vlut32(Vu32.b,Vv32.b,Rt8) C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32or_VbVbVbR(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) @@ -1711,9 +1404,7 @@ ========================================================================== */ #define Q6_Vb_vlut32or_VbVbVbR(Vx,Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracc)(Vx,Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vlut16(Vu32.b,Vv32.h,Rt8) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16_VbVhR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) @@ -1722,9 +1413,7 @@ ========================================================================== */ #define Q6_Wh_vlut16_VbVhR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h|=vlut16(Vu32.b,Vv32.h,Rt8) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16or_WhVbVhR(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) @@ -1733,9 +1422,7 @@ ========================================================================== */ #define Q6_Wh_vlut16or_WhVbVhR(Vxx,Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracc)(Vxx,Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmax(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vmax_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -1744,9 +1431,7 @@ ========================================================================== */ #define Q6_Vh_vmax_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vmax(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vub_vmax_VubVub(HVX_Vector Vu, HVX_Vector Vv) @@ -1755,9 +1440,7 @@ ========================================================================== */ #define Q6_Vub_vmax_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxub)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vmax(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vmax_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) @@ -1766,9 +1449,7 @@ ========================================================================== */ #define Q6_Vuh_vmax_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxuh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmax(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmax_VwVw(HVX_Vector Vu, HVX_Vector Vv) @@ -1777,9 +1458,7 @@ ========================================================================== */ #define Q6_Vw_vmax_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxw)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmin(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vmin_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -1788,9 +1467,7 @@ ========================================================================== */ #define Q6_Vh_vmin_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vmin(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vub_vmin_VubVub(HVX_Vector Vu, HVX_Vector Vv) @@ -1799,9 +1476,7 @@ ========================================================================== */ #define Q6_Vub_vmin_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminub)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vmin(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vmin_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) @@ -1810,9 +1485,7 @@ ========================================================================== */ #define Q6_Vuh_vmin_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminuh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmin(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmin_VwVw(HVX_Vector Vu, HVX_Vector Vv) @@ -1821,9 +1494,7 @@ ========================================================================== */ #define Q6_Vw_vmin_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminw)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpa(Vuu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubRb(HVX_VectorPair Vuu, Word32 Rt) @@ -1832,9 +1503,7 @@ ========================================================================== */ #define Q6_Wh_vmpa_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus)(Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vmpa(Vuu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpaacc_WhWubRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) @@ -1843,9 +1512,7 @@ ========================================================================== */ #define Q6_Wh_vmpaacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus_acc)(Vxx,Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpa(Vuu32.ub,Vvv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubWb(HVX_VectorPair Vuu, HVX_VectorPair Vvv) @@ -1854,9 +1521,7 @@ ========================================================================== */ #define Q6_Wh_vmpa_WubWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabusv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpa(Vuu32.ub,Vvv32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubWub(HVX_VectorPair Vuu, HVX_VectorPair Vvv) @@ -1865,9 +1530,7 @@ ========================================================================== */ #define Q6_Wh_vmpa_WubWub(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuuv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vmpa(Vuu32.h,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpa_WhRb(HVX_VectorPair Vuu, Word32 Rt) @@ -1876,9 +1539,7 @@ ========================================================================== */ #define Q6_Ww_vmpa_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb)(Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vmpa(Vuu32.h,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpaacc_WwWhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) @@ -1887,9 +1548,7 @@ ========================================================================== */ #define Q6_Ww_vmpaacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb_acc)(Vxx,Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpy(Vu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpy_VubRb(HVX_Vector Vu, Word32 Rt) @@ -1898,9 +1557,7 @@ ========================================================================== */ #define Q6_Wh_vmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vmpy(Vu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpyacc_WhVubRb(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) @@ -1909,9 +1566,7 @@ ========================================================================== */ #define Q6_Wh_vmpyacc_WhVubRb(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus_acc)(Vxx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpy(Vu32.ub,Vv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpy_VubVb(HVX_Vector Vu, HVX_Vector Vv) @@ -1920,9 +1575,7 @@ ========================================================================== */ #define Q6_Wh_vmpy_VubVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vmpy(Vu32.ub,Vv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpyacc_WhVubVb(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) @@ -1931,9 +1584,7 @@ ========================================================================== */ #define Q6_Wh_vmpyacc_WhVubVb(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv_acc)(Vxx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpy(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpy_VbVb(HVX_Vector Vu, HVX_Vector Vv) @@ -1942,9 +1593,7 @@ ========================================================================== */ #define Q6_Wh_vmpy_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vmpy(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpyacc_WhVbVb(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) @@ -1953,9 +1602,7 @@ ========================================================================== */ #define Q6_Wh_vmpyacc_WhVbVb(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv_acc)(Vxx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpye(Vu32.w,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpye_VwVuh(HVX_Vector Vu, HVX_Vector Vv) @@ -1964,9 +1611,7 @@ ========================================================================== */ #define Q6_Vw_vmpye_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vmpy(Vu32.h,Rt32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpy_VhRh(HVX_Vector Vu, Word32 Rt) @@ -1975,9 +1620,7 @@ ========================================================================== */ #define Q6_Ww_vmpy_VhRh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vmpy(Vu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhRh_sat(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) @@ -1986,9 +1629,7 @@ ========================================================================== */ #define Q6_Ww_vmpyacc_WwVhRh_sat(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsat_acc)(Vxx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhRh_s1_rnd_sat(HVX_Vector Vu, Word32 Rt) @@ -1997,9 +1638,7 @@ ========================================================================== */ #define Q6_Vh_vmpy_VhRh_s1_rnd_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsrs)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhRh_s1_sat(HVX_Vector Vu, Word32 Rt) @@ -2008,9 +1647,7 @@ ========================================================================== */ #define Q6_Vh_vmpy_VhRh_s1_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhss)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vmpy(Vu32.h,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpy_VhVuh(HVX_Vector Vu, HVX_Vector Vv) @@ -2019,9 +1656,7 @@ ========================================================================== */ #define Q6_Ww_vmpy_VhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vmpy(Vu32.h,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhVuh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) @@ -2030,9 +1665,7 @@ ========================================================================== */ #define Q6_Ww_vmpyacc_WwVhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus_acc)(Vxx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vmpy(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpy_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -2041,9 +1674,7 @@ ========================================================================== */ #define Q6_Ww_vmpy_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vmpy(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhVh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) @@ -2052,9 +1683,7 @@ ========================================================================== */ #define Q6_Ww_vmpyacc_WwVhVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv_acc)(Vxx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmpy(Vu32.h,Vv32.h):<<1:rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhVh_s1_rnd_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -2063,9 +1692,7 @@ ========================================================================== */ #define Q6_Vh_vmpy_VhVh_s1_rnd_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhvsrs)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyieo(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyieo_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -2074,9 +1701,7 @@ ========================================================================== */ #define Q6_Vw_vmpyieo_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyieoh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyie(Vu32.w,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyieacc_VwVwVh(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) @@ -2085,9 +1710,7 @@ ========================================================================== */ #define Q6_Vw_vmpyieacc_VwVwVh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewh_acc)(Vx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyie(Vu32.w,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyie_VwVuh(HVX_Vector Vu, HVX_Vector Vv) @@ -2096,9 +1719,7 @@ ========================================================================== */ #define Q6_Vw_vmpyie_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyie(Vu32.w,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyieacc_VwVwVuh(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) @@ -2107,9 +1728,7 @@ ========================================================================== */ #define Q6_Vw_vmpyieacc_VwVwVuh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh_acc)(Vx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmpyi(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyi_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -2118,9 +1737,7 @@ ========================================================================== */ #define Q6_Vh_vmpyi_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.h+=vmpyi(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyiacc_VhVhVh(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) @@ -2129,9 +1746,7 @@ ========================================================================== */ #define Q6_Vh_vmpyiacc_VhVhVh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih_acc)(Vx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmpyi(Vu32.h,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyi_VhRb(HVX_Vector Vu, Word32 Rt) @@ -2140,9 +1755,7 @@ ========================================================================== */ #define Q6_Vh_vmpyi_VhRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.h+=vmpyi(Vu32.h,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyiacc_VhVhRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) @@ -2151,9 +1764,7 @@ ========================================================================== */ #define Q6_Vh_vmpyiacc_VhVhRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb_acc)(Vx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyio(Vu32.w,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyio_VwVh(HVX_Vector Vu, HVX_Vector Vv) @@ -2162,9 +1773,7 @@ ========================================================================== */ #define Q6_Vw_vmpyio_VwVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiowh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyi(Vu32.w,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyi_VwRb(HVX_Vector Vu, Word32 Rt) @@ -2173,9 +1782,7 @@ ========================================================================== */ #define Q6_Vw_vmpyi_VwRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyi(Vu32.w,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyiacc_VwVwRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) @@ -2184,9 +1791,7 @@ ========================================================================== */ #define Q6_Vw_vmpyiacc_VwVwRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb_acc)(Vx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyi(Vu32.w,Rt32.h) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyi_VwRh(HVX_Vector Vu, Word32 Rt) @@ -2195,9 +1800,7 @@ ========================================================================== */ #define Q6_Vw_vmpyi_VwRh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyi(Vu32.w,Rt32.h) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyiacc_VwVwRh(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) @@ -2206,9 +1809,7 @@ ========================================================================== */ #define Q6_Vw_vmpyiacc_VwVwRh(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh_acc)(Vx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyo_VwVh_s1_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -2217,9 +1818,7 @@ ========================================================================== */ #define Q6_Vw_vmpyo_VwVh_s1_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyo_VwVh_s1_rnd_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -2228,9 +1827,7 @@ ========================================================================== */ #define Q6_Vw_vmpyo_VwVh_s1_rnd_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<1:rnd:sat:shift C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyoacc_VwVwVh_s1_rnd_sat_shift(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) @@ -2239,9 +1836,7 @@ ========================================================================== */ #define Q6_Vw_vmpyoacc_VwVwVh_s1_rnd_sat_shift(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd_sacc)(Vx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<1:sat:shift C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) @@ -2250,9 +1845,7 @@ ========================================================================== */ #define Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_sacc)(Vx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uh=vmpy(Vu32.ub,Rt32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpy_VubRub(HVX_Vector Vu, Word32 Rt) @@ -2261,9 +1854,7 @@ ========================================================================== */ #define Q6_Wuh_vmpy_VubRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uh+=vmpy(Vu32.ub,Rt32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpyacc_WuhVubRub(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) @@ -2272,9 +1863,7 @@ ========================================================================== */ #define Q6_Wuh_vmpyacc_WuhVubRub(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub_acc)(Vxx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uh=vmpy(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpy_VubVub(HVX_Vector Vu, HVX_Vector Vv) @@ -2283,9 +1872,7 @@ ========================================================================== */ #define Q6_Wuh_vmpy_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uh+=vmpy(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpyacc_WuhVubVub(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) @@ -2294,9 +1881,7 @@ ========================================================================== */ #define Q6_Wuh_vmpyacc_WuhVubVub(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv_acc)(Vxx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vmpy(Vu32.uh,Rt32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpy_VuhRuh(HVX_Vector Vu, Word32 Rt) @@ -2305,9 +1890,7 @@ ========================================================================== */ #define Q6_Wuw_vmpy_VuhRuh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uw+=vmpy(Vu32.uh,Rt32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpyacc_WuwVuhRuh(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) @@ -2316,9 +1899,7 @@ ========================================================================== */ #define Q6_Wuw_vmpyacc_WuwVuhRuh(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh_acc)(Vxx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vmpy(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpy_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) @@ -2327,9 +1908,7 @@ ========================================================================== */ #define Q6_Wuw_vmpy_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uw+=vmpy(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpyacc_WuwVuhVuh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) @@ -2338,9 +1917,7 @@ ========================================================================== */ #define Q6_Wuw_vmpyacc_WuwVuhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv_acc)(Vxx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vmux(Qt4,Vu32,Vv32) C Intrinsic Prototype: HVX_Vector Q6_V_vmux_QVV(HVX_VectorPred Qt, HVX_Vector Vu, HVX_Vector Vv) @@ -2349,9 +1926,7 @@ ========================================================================== */ #define Q6_V_vmux_QVV(Qt,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmux)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1),Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vnavg(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vnavg_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -2360,9 +1935,7 @@ ========================================================================== */ #define Q6_Vh_vnavg_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vnavg(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vb_vnavg_VubVub(HVX_Vector Vu, HVX_Vector Vv) @@ -2371,9 +1944,7 @@ ========================================================================== */ #define Q6_Vb_vnavg_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgub)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vnavg(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vnavg_VwVw(HVX_Vector Vu, HVX_Vector Vv) @@ -2382,9 +1953,7 @@ ========================================================================== */ #define Q6_Vw_vnavg_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgw)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vnormamt(Vu32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vnormamt_Vh(HVX_Vector Vu) @@ -2393,9 +1962,7 @@ ========================================================================== */ #define Q6_Vh_vnormamt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamth)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vnormamt(Vu32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vnormamt_Vw(HVX_Vector Vu) @@ -2404,9 +1971,7 @@ ========================================================================== */ #define Q6_Vw_vnormamt_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamtw)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vnot(Vu32) C Intrinsic Prototype: HVX_Vector Q6_V_vnot_V(HVX_Vector Vu) @@ -2415,9 +1980,7 @@ ========================================================================== */ #define Q6_V_vnot_V(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnot)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vor(Vu32,Vv32) C Intrinsic Prototype: HVX_Vector Q6_V_vor_VV(HVX_Vector Vu, HVX_Vector Vv) @@ -2426,9 +1989,7 @@ ========================================================================== */ #define Q6_V_vor_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vor)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vpacke(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vb_vpacke_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -2437,9 +1998,7 @@ ========================================================================== */ #define Q6_Vb_vpacke_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeb)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vpacke(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vh_vpacke_VwVw(HVX_Vector Vu, HVX_Vector Vv) @@ -2448,9 +2007,7 @@ ========================================================================== */ #define Q6_Vh_vpacke_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vpack(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vb_vpack_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -2459,9 +2016,7 @@ ========================================================================== */ #define Q6_Vb_vpack_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhb_sat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vpack(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vpack_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -2470,9 +2025,7 @@ ========================================================================== */ #define Q6_Vub_vpack_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhub_sat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vpacko(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vb_vpacko_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -2481,9 +2034,7 @@ ========================================================================== */ #define Q6_Vb_vpacko_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackob)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vpacko(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vh_vpacko_VwVw(HVX_Vector Vu, HVX_Vector Vv) @@ -2492,9 +2043,7 @@ ========================================================================== */ #define Q6_Vh_vpacko_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackoh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vpack(Vu32.w,Vv32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vpack_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -2503,9 +2052,7 @@ ========================================================================== */ #define Q6_Vh_vpack_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwh_sat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vpack(Vu32.w,Vv32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vpack_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -2514,9 +2061,7 @@ ========================================================================== */ #define Q6_Vuh_vpack_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwuh_sat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vpopcount(Vu32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vpopcount_Vh(HVX_Vector Vu) @@ -2525,9 +2070,7 @@ ========================================================================== */ #define Q6_Vh_vpopcount_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpopcounth)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vrdelta(Vu32,Vv32) C Intrinsic Prototype: HVX_Vector Q6_V_vrdelta_VV(HVX_Vector Vu, HVX_Vector Vv) @@ -2536,9 +2079,7 @@ ========================================================================== */ #define Q6_V_vrdelta_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrdelta)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vrmpy(Vu32.ub,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpy_VubRb(HVX_Vector Vu, Word32 Rt) @@ -2547,9 +2088,7 @@ ========================================================================== */ #define Q6_Vw_vrmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vrmpy(Vu32.ub,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVubRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) @@ -2558,9 +2097,7 @@ ========================================================================== */ #define Q6_Vw_vrmpyacc_VwVubRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus_acc)(Vx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vrmpy(Vuu32.ub,Rt32.b,#u1) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vrmpy_WubRbI(HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) @@ -2569,9 +2106,7 @@ ========================================================================== */ #define Q6_Ww_vrmpy_WubRbI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi)(Vuu,Rt,Iu1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vrmpy(Vuu32.ub,Rt32.b,#u1) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vrmpyacc_WwWubRbI(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) @@ -2580,9 +2115,7 @@ ========================================================================== */ #define Q6_Ww_vrmpyacc_WwWubRbI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi_acc)(Vxx,Vuu,Rt,Iu1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vrmpy(Vu32.ub,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpy_VubVb(HVX_Vector Vu, HVX_Vector Vv) @@ -2591,9 +2124,7 @@ ========================================================================== */ #define Q6_Vw_vrmpy_VubVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vrmpy(Vu32.ub,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVubVb(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) @@ -2602,9 +2133,7 @@ ========================================================================== */ #define Q6_Vw_vrmpyacc_VwVubVb(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv_acc)(Vx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vrmpy(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpy_VbVb(HVX_Vector Vu, HVX_Vector Vv) @@ -2613,9 +2142,7 @@ ========================================================================== */ #define Q6_Vw_vrmpy_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vrmpy(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVbVb(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) @@ -2624,9 +2151,7 @@ ========================================================================== */ #define Q6_Vw_vrmpyacc_VwVbVb(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv_acc)(Vx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uw=vrmpy(Vu32.ub,Rt32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpy_VubRub(HVX_Vector Vu, Word32 Rt) @@ -2635,9 +2160,7 @@ ========================================================================== */ #define Q6_Vuw_vrmpy_VubRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.uw+=vrmpy(Vu32.ub,Rt32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpyacc_VuwVubRub(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) @@ -2646,9 +2169,7 @@ ========================================================================== */ #define Q6_Vuw_vrmpyacc_VuwVubRub(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub_acc)(Vx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vrmpy(Vuu32.ub,Rt32.ub,#u1) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrmpy_WubRubI(HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) @@ -2657,9 +2178,7 @@ ========================================================================== */ #define Q6_Wuw_vrmpy_WubRubI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi)(Vuu,Rt,Iu1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uw+=vrmpy(Vuu32.ub,Rt32.ub,#u1) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrmpyacc_WuwWubRubI(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) @@ -2668,9 +2187,7 @@ ========================================================================== */ #define Q6_Wuw_vrmpyacc_WuwWubRubI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi_acc)(Vxx,Vuu,Rt,Iu1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uw=vrmpy(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpy_VubVub(HVX_Vector Vu, HVX_Vector Vv) @@ -2679,9 +2196,7 @@ ========================================================================== */ #define Q6_Vuw_vrmpy_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.uw+=vrmpy(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpyacc_VuwVubVub(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) @@ -2690,9 +2205,7 @@ ========================================================================== */ #define Q6_Vuw_vrmpyacc_VuwVubVub(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv_acc)(Vx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vror(Vu32,Rt32) C Intrinsic Prototype: HVX_Vector Q6_V_vror_VR(HVX_Vector Vu, Word32 Rt) @@ -2701,9 +2214,7 @@ ========================================================================== */ #define Q6_V_vror_VR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vror)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vround(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vb_vround_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -2712,9 +2223,7 @@ ========================================================================== */ #define Q6_Vb_vround_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhb)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vround(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vround_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -2723,9 +2232,7 @@ ========================================================================== */ #define Q6_Vub_vround_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhub)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vround(Vu32.w,Vv32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vround_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -2734,9 +2241,7 @@ ========================================================================== */ #define Q6_Vh_vround_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vround(Vu32.w,Vv32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vround_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -2745,9 +2250,7 @@ ========================================================================== */ #define Q6_Vuh_vround_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwuh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vrsad(Vuu32.ub,Rt32.ub,#u1) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrsad_WubRubI(HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) @@ -2756,9 +2259,7 @@ ========================================================================== */ #define Q6_Wuw_vrsad_WubRubI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi)(Vuu,Rt,Iu1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uw+=vrsad(Vuu32.ub,Rt32.ub,#u1) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrsadacc_WuwWubRubI(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) @@ -2767,9 +2268,7 @@ ========================================================================== */ #define Q6_Wuw_vrsadacc_WuwWubRubI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi_acc)(Vxx,Vuu,Rt,Iu1) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vsat(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vub_vsat_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -2778,9 +2277,7 @@ ========================================================================== */ #define Q6_Vub_vsat_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsathub)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vsat(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vh_vsat_VwVw(HVX_Vector Vu, HVX_Vector Vv) @@ -2789,9 +2286,7 @@ ========================================================================== */ #define Q6_Vh_vsat_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatwh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vsxt(Vu32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsxt_Vb(HVX_Vector Vu) @@ -2800,9 +2295,7 @@ ========================================================================== */ #define Q6_Wh_vsxt_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsb)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vsxt(Vu32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsxt_Vh(HVX_Vector Vu) @@ -2811,9 +2304,7 @@ ========================================================================== */ #define Q6_Ww_vsxt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsh)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vshuffe(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vshuffe_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -2822,9 +2313,7 @@ ========================================================================== */ #define Q6_Vh_vshuffe_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufeh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vshuff(Vu32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vshuff_Vb(HVX_Vector Vu) @@ -2833,9 +2322,7 @@ ========================================================================== */ #define Q6_Vb_vshuff_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffb)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vshuffe(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vshuffe_VbVb(HVX_Vector Vu, HVX_Vector Vv) @@ -2844,9 +2331,7 @@ ========================================================================== */ #define Q6_Vb_vshuffe_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffeb)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vshuff(Vu32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vshuff_Vh(HVX_Vector Vu) @@ -2855,9 +2340,7 @@ ========================================================================== */ #define Q6_Vh_vshuff_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffh)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vshuffo(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vshuffo_VbVb(HVX_Vector Vu, HVX_Vector Vv) @@ -2866,9 +2349,7 @@ ========================================================================== */ #define Q6_Vb_vshuffo_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffob)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32=vshuff(Vu32,Vv32,Rt8) C Intrinsic Prototype: HVX_VectorPair Q6_W_vshuff_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) @@ -2877,9 +2358,7 @@ ========================================================================== */ #define Q6_W_vshuff_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffvdd)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.b=vshuffoe(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vshuffoe_VbVb(HVX_Vector Vu, HVX_Vector Vv) @@ -2888,9 +2367,7 @@ ========================================================================== */ #define Q6_Wb_vshuffoe_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeb)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vshuffoe(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vshuffoe_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -2899,9 +2376,7 @@ ========================================================================== */ #define Q6_Wh_vshuffoe_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vshuffo(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vshuffo_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -2910,9 +2385,7 @@ ========================================================================== */ #define Q6_Vh_vshuffo_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vsub(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vsub_VbVb(HVX_Vector Vu, HVX_Vector Vv) @@ -2921,9 +2394,7 @@ ========================================================================== */ #define Q6_Vb_vsub_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.b=vsub(Vuu32.b,Vvv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vsub_WbWb(HVX_VectorPair Vuu, HVX_VectorPair Vvv) @@ -2932,9 +2403,7 @@ ========================================================================== */ #define Q6_Wb_vsub_WbWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb_dv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) Vx32.b-=Vu32.b C Intrinsic Prototype: HVX_Vector Q6_Vb_condnac_QnVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) @@ -2943,9 +2412,7 @@ ========================================================================== */ #define Q6_Vb_condnac_QnVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) Vx32.b-=Vu32.b C Intrinsic Prototype: HVX_Vector Q6_Vb_condnac_QVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) @@ -2954,9 +2421,7 @@ ========================================================================== */ #define Q6_Vb_condnac_QVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vsub(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vsub_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -2965,9 +2430,7 @@ ========================================================================== */ #define Q6_Vh_vsub_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vsub(Vuu32.h,Vvv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsub_WhWh(HVX_VectorPair Vuu, HVX_VectorPair Vvv) @@ -2976,9 +2439,7 @@ ========================================================================== */ #define Q6_Wh_vsub_WhWh(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh_dv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) Vx32.h-=Vu32.h C Intrinsic Prototype: HVX_Vector Q6_Vh_condnac_QnVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) @@ -2987,9 +2448,7 @@ ========================================================================== */ #define Q6_Vh_condnac_QnVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) Vx32.h-=Vu32.h C Intrinsic Prototype: HVX_Vector Q6_Vh_condnac_QVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) @@ -2998,9 +2457,7 @@ ========================================================================== */ #define Q6_Vh_condnac_QVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vsub(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vsub_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -3009,9 +2466,7 @@ ========================================================================== */ #define Q6_Vh_vsub_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vsub(Vuu32.h,Vvv32.h):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsub_WhWh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) @@ -3020,9 +2475,7 @@ ========================================================================== */ #define Q6_Wh_vsub_WhWh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat_dv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vsub(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_VhVh(HVX_Vector Vu, HVX_Vector Vv) @@ -3031,9 +2484,7 @@ ========================================================================== */ #define Q6_Ww_vsub_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhw)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vsub(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsub_VubVub(HVX_Vector Vu, HVX_Vector Vv) @@ -3042,9 +2493,7 @@ ========================================================================== */ #define Q6_Wh_vsub_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vsub(Vu32.ub,Vv32.ub):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vsub_VubVub_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -3053,9 +2502,7 @@ ========================================================================== */ #define Q6_Vub_vsub_VubVub_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.ub=vsub(Vuu32.ub,Vvv32.ub):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wub_vsub_WubWub_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) @@ -3064,9 +2511,7 @@ ========================================================================== */ #define Q6_Wub_vsub_WubWub_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat_dv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vsub(Vu32.uh,Vv32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vsub_VuhVuh_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -3075,9 +2520,7 @@ ========================================================================== */ #define Q6_Vuh_vsub_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uh=vsub(Vuu32.uh,Vvv32.uh):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vsub_WuhWuh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) @@ -3086,9 +2529,7 @@ ========================================================================== */ #define Q6_Wuh_vsub_WuhWuh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat_dv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vsub(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) @@ -3097,9 +2538,7 @@ ========================================================================== */ #define Q6_Ww_vsub_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhw)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vsub(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vsub_VwVw(HVX_Vector Vu, HVX_Vector Vv) @@ -3108,9 +2547,7 @@ ========================================================================== */ #define Q6_Vw_vsub_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vsub(Vuu32.w,Vvv32.w) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_WwWw(HVX_VectorPair Vuu, HVX_VectorPair Vvv) @@ -3119,9 +2556,7 @@ ========================================================================== */ #define Q6_Ww_vsub_WwWw(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw_dv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) Vx32.w-=Vu32.w C Intrinsic Prototype: HVX_Vector Q6_Vw_condnac_QnVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) @@ -3130,9 +2565,7 @@ ========================================================================== */ #define Q6_Vw_condnac_QnVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) Vx32.w-=Vu32.w C Intrinsic Prototype: HVX_Vector Q6_Vw_condnac_QVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) @@ -3141,9 +2574,7 @@ ========================================================================== */ #define Q6_Vw_condnac_QVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vsub(Vu32.w,Vv32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vsub_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) @@ -3152,9 +2583,7 @@ ========================================================================== */ #define Q6_Vw_vsub_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vsub(Vuu32.w,Vvv32.w):sat C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_WwWw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) @@ -3163,9 +2592,7 @@ ========================================================================== */ #define Q6_Ww_vsub_WwWw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat_dv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32=vswap(Qt4,Vu32,Vv32) C Intrinsic Prototype: HVX_VectorPair Q6_W_vswap_QVV(HVX_VectorPred Qt, HVX_Vector Vu, HVX_Vector Vv) @@ -3174,9 +2601,7 @@ ========================================================================== */ #define Q6_W_vswap_QVV(Qt,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vswap)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1),Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vtmpy(Vuu32.b,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpy_WbRb(HVX_VectorPair Vuu, Word32 Rt) @@ -3185,9 +2610,7 @@ ========================================================================== */ #define Q6_Wh_vtmpy_WbRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb)(Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vtmpy(Vuu32.b,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpyacc_WhWbRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) @@ -3196,9 +2619,7 @@ ========================================================================== */ #define Q6_Wh_vtmpyacc_WhWbRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb_acc)(Vxx,Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vtmpy(Vuu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpy_WubRb(HVX_VectorPair Vuu, Word32 Rt) @@ -3207,9 +2628,7 @@ ========================================================================== */ #define Q6_Wh_vtmpy_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus)(Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vtmpy(Vuu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpyacc_WhWubRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) @@ -3218,9 +2637,7 @@ ========================================================================== */ #define Q6_Wh_vtmpyacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus_acc)(Vxx,Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vtmpy(Vuu32.h,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vtmpy_WhRb(HVX_VectorPair Vuu, Word32 Rt) @@ -3229,9 +2646,7 @@ ========================================================================== */ #define Q6_Ww_vtmpy_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb)(Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vtmpy(Vuu32.h,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vtmpyacc_WwWhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) @@ -3240,9 +2655,7 @@ ========================================================================== */ #define Q6_Ww_vtmpyacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb_acc)(Vxx,Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vunpack(Vu32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vunpack_Vb(HVX_Vector Vu) @@ -3251,9 +2664,7 @@ ========================================================================== */ #define Q6_Wh_vunpack_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackb)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vunpack(Vu32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vunpack_Vh(HVX_Vector Vu) @@ -3262,9 +2673,7 @@ ========================================================================== */ #define Q6_Ww_vunpack_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackh)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h|=vunpacko(Vu32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vunpackoor_WhVb(HVX_VectorPair Vxx, HVX_Vector Vu) @@ -3273,9 +2682,7 @@ ========================================================================== */ #define Q6_Wh_vunpackoor_WhVb(Vxx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackob)(Vxx,Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w|=vunpacko(Vu32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vunpackoor_WwVh(HVX_VectorPair Vxx, HVX_Vector Vu) @@ -3284,9 +2691,7 @@ ========================================================================== */ #define Q6_Ww_vunpackoor_WwVh(Vxx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackoh)(Vxx,Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uh=vunpack(Vu32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vunpack_Vub(HVX_Vector Vu) @@ -3295,9 +2700,7 @@ ========================================================================== */ #define Q6_Wuh_vunpack_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackub)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vunpack(Vu32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vunpack_Vuh(HVX_Vector Vu) @@ -3306,9 +2709,7 @@ ========================================================================== */ #define Q6_Wuw_vunpack_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackuh)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vxor(Vu32,Vv32) C Intrinsic Prototype: HVX_Vector Q6_V_vxor_VV(HVX_Vector Vu, HVX_Vector Vv) @@ -3317,9 +2718,7 @@ ========================================================================== */ #define Q6_V_vxor_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vxor)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uh=vzxt(Vu32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vzxt_Vub(HVX_Vector Vu) @@ -3328,9 +2727,7 @@ ========================================================================== */ #define Q6_Wuh_vzxt_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzb)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ -#if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vzxt(Vu32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vzxt_Vuh(HVX_Vector Vu) @@ -3339,7 +2736,6 @@ ========================================================================== */ #define Q6_Wuw_vzxt_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzh)(Vu) -#endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3350,7 +2746,7 @@ ========================================================================== */ #define Q6_Vb_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatb)(Rt) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3361,7 +2757,7 @@ ========================================================================== */ #define Q6_Vh_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplath)(Rt) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3372,7 +2768,7 @@ ========================================================================== */ #define Q6_Q_vsetq2_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2v2)(Rt)),-1) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3383,7 +2779,7 @@ ========================================================================== */ #define Q6_Qb_vshuffe_QhQh(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqh)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3394,7 +2790,7 @@ ========================================================================== */ #define Q6_Qh_vshuffe_QwQw(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqw)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3405,7 +2801,7 @@ ========================================================================== */ #define Q6_Vb_vadd_VbVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3416,7 +2812,7 @@ ========================================================================== */ #define Q6_Wb_vadd_WbWb_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat_dv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3427,7 +2823,7 @@ ========================================================================== */ #define Q6_Vw_vadd_VwVwQ_carry(Vu,Vv,Qx) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarry)(Vu,Vv,Qx) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3438,7 +2834,7 @@ ========================================================================== */ #define Q6_Vh_vadd_vclb_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3449,7 +2845,7 @@ ========================================================================== */ #define Q6_Vw_vadd_vclb_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbw)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3460,7 +2856,7 @@ ========================================================================== */ #define Q6_Ww_vaddacc_WwVhVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw_acc)(Vxx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3471,7 +2867,7 @@ ========================================================================== */ #define Q6_Wh_vaddacc_WhVubVub(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh_acc)(Vxx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3482,7 +2878,7 @@ ========================================================================== */ #define Q6_Vub_vadd_VubVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddububb_sat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3493,7 +2889,7 @@ ========================================================================== */ #define Q6_Ww_vaddacc_WwVuhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw_acc)(Vxx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3504,7 +2900,7 @@ ========================================================================== */ #define Q6_Vuw_vadd_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3515,7 +2911,7 @@ ========================================================================== */ #define Q6_Wuw_vadd_WuwWuw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat_dv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3526,7 +2922,7 @@ ========================================================================== */ #define Q6_V_vand_QnR(Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3537,7 +2933,7 @@ ========================================================================== */ #define Q6_V_vandor_VQnR(Vx,Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt_acc)(Vx,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3548,7 +2944,7 @@ ========================================================================== */ #define Q6_V_vand_QnV(Qv,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvnqv)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vu) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3559,7 +2955,7 @@ ========================================================================== */ #define Q6_V_vand_QV(Qv,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvqv)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vu) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3570,7 +2966,7 @@ ========================================================================== */ #define Q6_Vb_vasr_VhVhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbsat)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3581,7 +2977,7 @@ ========================================================================== */ #define Q6_Vuh_vasr_VuwVuwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhrndsat)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3592,7 +2988,7 @@ ========================================================================== */ #define Q6_Vuh_vasr_VwVwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhrndsat)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3603,7 +2999,7 @@ ========================================================================== */ #define Q6_Vub_vlsr_VubR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrb)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3614,7 +3010,7 @@ ========================================================================== */ #define Q6_Vb_vlut32_VbVbR_nomatch(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_nm)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3625,7 +3021,7 @@ ========================================================================== */ #define Q6_Vb_vlut32or_VbVbVbI(Vx,Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracci)(Vx,Vu,Vv,Iu3) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3636,7 +3032,7 @@ ========================================================================== */ #define Q6_Vb_vlut32_VbVbI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvbi)(Vu,Vv,Iu3) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3647,7 +3043,7 @@ ========================================================================== */ #define Q6_Wh_vlut16_VbVhR_nomatch(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_nm)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3658,7 +3054,7 @@ ========================================================================== */ #define Q6_Wh_vlut16or_WhVbVhI(Vxx,Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracci)(Vxx,Vu,Vv,Iu3) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3669,7 +3065,7 @@ ========================================================================== */ #define Q6_Wh_vlut16_VbVhI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwhi)(Vu,Vv,Iu3) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3680,7 +3076,7 @@ ========================================================================== */ #define Q6_Vb_vmax_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxb)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3691,7 +3087,7 @@ ========================================================================== */ #define Q6_Vb_vmin_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminb)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3702,7 +3098,7 @@ ========================================================================== */ #define Q6_Ww_vmpa_WuhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb)(Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3713,7 +3109,7 @@ ========================================================================== */ #define Q6_Ww_vmpaacc_WwWuhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb_acc)(Vxx,Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3724,7 +3120,7 @@ ========================================================================== */ #define Q6_W_vmpye_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh_64)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3735,7 +3131,7 @@ ========================================================================== */ #define Q6_Vw_vmpyi_VwRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3746,7 +3142,7 @@ ========================================================================== */ #define Q6_Vw_vmpyiacc_VwVwRub(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub_acc)(Vx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3757,7 +3153,7 @@ ========================================================================== */ #define Q6_W_vmpyoacc_WVwVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_64_acc)(Vxx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3768,7 +3164,7 @@ ========================================================================== */ #define Q6_Vub_vround_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduhub)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3779,7 +3175,7 @@ ========================================================================== */ #define Q6_Vuh_vround_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduwuh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3790,7 +3186,7 @@ ========================================================================== */ #define Q6_Vuh_vsat_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatuwuh)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3801,7 +3197,7 @@ ========================================================================== */ #define Q6_Vb_vsub_VbVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3812,7 +3208,7 @@ ========================================================================== */ #define Q6_Wb_vsub_WbWb_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat_dv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3823,7 +3219,7 @@ ========================================================================== */ #define Q6_Vw_vsub_VwVwQ_carry(Vu,Vv,Qx) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubcarry)(Vu,Vv,Qx) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3834,7 +3230,7 @@ ========================================================================== */ #define Q6_Vub_vsub_VubVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubububb_sat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3845,7 +3241,7 @@ ========================================================================== */ #define Q6_Vuw_vsub_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 62 /* ========================================================================== @@ -3856,7 +3252,7 @@ ========================================================================== */ #define Q6_Wuw_vsub_WuwWuw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat_dv)(Vuu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 62 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -3867,7 +3263,7 @@ ========================================================================== */ #define Q6_Vb_vabs_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb)(Vu) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -3878,7 +3274,7 @@ ========================================================================== */ #define Q6_Vb_vabs_Vb_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb_sat)(Vu) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -3889,7 +3285,7 @@ ========================================================================== */ #define Q6_Vh_vaslacc_VhVhR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh_acc)(Vx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -3900,7 +3296,7 @@ ========================================================================== */ #define Q6_Vh_vasracc_VhVhR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh_acc)(Vx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -3911,7 +3307,7 @@ ========================================================================== */ #define Q6_Vub_vasr_VuhVuhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubrndsat)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -3922,7 +3318,7 @@ ========================================================================== */ #define Q6_Vub_vasr_VuhVuhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubsat)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -3933,7 +3329,7 @@ ========================================================================== */ #define Q6_Vuh_vasr_VuwVuwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhsat)(Vu,Vv,Rt) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -3944,7 +3340,7 @@ ========================================================================== */ #define Q6_Vb_vavg_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgb)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -3955,7 +3351,7 @@ ========================================================================== */ #define Q6_Vb_vavg_VbVb_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgbrnd)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -3966,7 +3362,7 @@ ========================================================================== */ #define Q6_Vuw_vavg_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguw)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -3977,7 +3373,7 @@ ========================================================================== */ #define Q6_Vuw_vavg_VuwVuw_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguwrnd)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -3988,7 +3384,7 @@ ========================================================================== */ #define Q6_W_vzero() __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdd0)() -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -3999,7 +3395,7 @@ ========================================================================== */ #define Q6_vgather_ARMVh(Rs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermh)(Rs,Rt,Mu,Vv) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4010,7 +3406,7 @@ ========================================================================== */ #define Q6_vgather_AQRMVh(Rs,Qs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4021,7 +3417,7 @@ ========================================================================== */ #define Q6_vgather_ARMWw(Rs,Rt,Mu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhw)(Rs,Rt,Mu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4032,7 +3428,7 @@ ========================================================================== */ #define Q6_vgather_AQRMWw(Rs,Qs,Rt,Mu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhwq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vvv) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4043,7 +3439,7 @@ ========================================================================== */ #define Q6_vgather_ARMVw(Rs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermw)(Rs,Rt,Mu,Vv) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4054,7 +3450,7 @@ ========================================================================== */ #define Q6_vgather_AQRMVw(Rs,Qs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermwq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4065,7 +3461,7 @@ ========================================================================== */ #define Q6_Vh_vlut4_VuhPh(Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlut4)(Vu,Rtt) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4076,7 +3472,7 @@ ========================================================================== */ #define Q6_Wh_vmpa_WubRub(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu)(Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4087,7 +3483,7 @@ ========================================================================== */ #define Q6_Wh_vmpaacc_WhWubRub(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu_acc)(Vxx,Vuu,Rt) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4098,7 +3494,7 @@ ========================================================================== */ #define Q6_Vh_vmpa_VhVhVhPh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahhsat)(Vx,Vu,Rtt) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4109,7 +3505,7 @@ ========================================================================== */ #define Q6_Vh_vmpa_VhVhVuhPuh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhuhsat)(Vx,Vu,Rtt) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4120,7 +3516,7 @@ ========================================================================== */ #define Q6_Vh_vmps_VhVhVuhPuh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpsuhuhsat)(Vx,Vu,Rtt) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4131,7 +3527,7 @@ ========================================================================== */ #define Q6_Ww_vmpyacc_WwVhRh(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh_acc)(Vxx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4142,7 +3538,7 @@ ========================================================================== */ #define Q6_Vuw_vmpye_VuhRuh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe)(Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4153,7 +3549,7 @@ ========================================================================== */ #define Q6_Vuw_vmpyeacc_VuwVuhRuh(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe_acc)(Vx,Vu,Rt) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4164,7 +3560,7 @@ ========================================================================== */ #define Q6_Vb_vnavg_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgb)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4175,7 +3571,7 @@ ========================================================================== */ #define Q6_Vb_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqb)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1)) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4186,7 +3582,7 @@ ========================================================================== */ #define Q6_Vh_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqh)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1)) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4197,7 +3593,7 @@ ========================================================================== */ #define Q6_Vw_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqw)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1)) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4208,7 +3604,7 @@ ========================================================================== */ #define Q6_vscatter_RMVhV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh)(Rt,Mu,Vv,Vw) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4219,7 +3615,7 @@ ========================================================================== */ #define Q6_vscatteracc_RMVhV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh_add)(Rt,Mu,Vv,Vw) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4230,7 +3626,7 @@ ========================================================================== */ #define Q6_vscatter_QRMVhV(Qs,Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv,Vw) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4241,7 +3637,7 @@ ========================================================================== */ #define Q6_vscatter_RMWwV(Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw)(Rt,Mu,Vvv,Vw) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4252,7 +3648,7 @@ ========================================================================== */ #define Q6_vscatteracc_RMWwV(Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw_add)(Rt,Mu,Vvv,Vw) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4263,7 +3659,7 @@ ========================================================================== */ #define Q6_vscatter_QRMWwV(Qs,Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vvv,Vw) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4274,7 +3670,7 @@ ========================================================================== */ #define Q6_vscatter_RMVwV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw)(Rt,Mu,Vv,Vw) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4285,7 +3681,7 @@ ========================================================================== */ #define Q6_vscatteracc_RMVwV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw_add)(Rt,Mu,Vv,Vw) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 65 /* ========================================================================== @@ -4296,7 +3692,7 @@ ========================================================================== */ #define Q6_vscatter_QRMVwV(Qs,Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv,Vw) -#endif /* __HEXAGON_ARCH___ >= 65 */ +#endif #if __HVX_ARCH__ >= 66 /* ========================================================================== @@ -4307,7 +3703,7 @@ ========================================================================== */ #define Q6_Vw_vadd_VwVwQ_carry_sat(Vu,Vv,Qs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarrysat)(Vu,Vv,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1)) -#endif /* __HEXAGON_ARCH___ >= 66 */ +#endif #if __HVX_ARCH__ >= 66 /* ========================================================================== @@ -4318,7 +3714,7 @@ ========================================================================== */ #define Q6_Ww_vasrinto_WwVwVw(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasr_into)(Vxx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 66 */ +#endif #if __HVX_ARCH__ >= 66 /* ========================================================================== @@ -4329,7 +3725,7 @@ ========================================================================== */ #define Q6_Vuw_vrotr_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrotr)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 66 */ +#endif #if __HVX_ARCH__ >= 66 /* ========================================================================== @@ -4340,7 +3736,7 @@ ========================================================================== */ #define Q6_Vw_vsatdw_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatdw)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 66 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4351,7 +3747,7 @@ ========================================================================== */ #define Q6_Ww_v6mpy_WubWbI_h(Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10)(Vuu,Vvv,Iu2) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4362,7 +3758,7 @@ ========================================================================== */ #define Q6_Ww_v6mpyacc_WwWubWbI_h(Vxx,Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10_vxx)(Vxx,Vuu,Vvv,Iu2) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4373,7 +3769,7 @@ ========================================================================== */ #define Q6_Ww_v6mpy_WubWbI_v(Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10)(Vuu,Vvv,Iu2) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4384,9 +3780,9 @@ ========================================================================== */ #define Q6_Ww_v6mpyacc_WwWubWbI_v(Vxx,Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10_vxx)(Vxx,Vuu,Vvv,Iu2) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.hf=vabs(Vu32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vabs_Vhf(HVX_Vector Vu) @@ -4395,9 +3791,9 @@ ========================================================================== */ #define Q6_Vhf_vabs_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_hf)(Vu) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.sf=vabs(Vu32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vabs_Vsf(HVX_Vector Vu) @@ -4406,7 +3802,7 @@ ========================================================================== */ #define Q6_Vsf_vabs_Vsf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_sf)(Vu) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4417,9 +3813,9 @@ ========================================================================== */ #define Q6_Vqf16_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.hf=vadd(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) @@ -4428,7 +3824,7 @@ ========================================================================== */ #define Q6_Vhf_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_hf_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4439,7 +3835,7 @@ ========================================================================== */ #define Q6_Vqf16_vadd_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf16)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4450,7 +3846,7 @@ ========================================================================== */ #define Q6_Vqf16_vadd_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf16_mix)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4461,7 +3857,7 @@ ========================================================================== */ #define Q6_Vqf32_vadd_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf32)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4472,7 +3868,7 @@ ========================================================================== */ #define Q6_Vqf32_vadd_Vqf32Vsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf32_mix)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4483,9 +3879,9 @@ ========================================================================== */ #define Q6_Vqf32_vadd_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vdd32.sf=vadd(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) @@ -4494,9 +3890,9 @@ ========================================================================== */ #define Q6_Wsf_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.sf=vadd(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vadd_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) @@ -4505,9 +3901,9 @@ ========================================================================== */ #define Q6_Vsf_vadd_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf_sf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.w=vfmv(Vu32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vfmv_Vw(HVX_Vector Vu) @@ -4516,7 +3912,7 @@ ========================================================================== */ #define Q6_Vw_vfmv_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassign_fp)(Vu) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4527,7 +3923,7 @@ ========================================================================== */ #define Q6_Vhf_equals_Vqf16(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_hf_qf16)(Vu) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4538,7 +3934,7 @@ ========================================================================== */ #define Q6_Vhf_equals_Wqf32(Vuu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_hf_qf32)(Vuu) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4549,9 +3945,9 @@ ========================================================================== */ #define Q6_Vsf_equals_Vqf32(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_sf_qf32)(Vu) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.b=vcvt(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vb_vcvt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) @@ -4560,9 +3956,9 @@ ========================================================================== */ #define Q6_Vb_vcvt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_b_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.h=vcvt(Vu32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vh_vcvt_Vhf(HVX_Vector Vu) @@ -4571,9 +3967,9 @@ ========================================================================== */ #define Q6_Vh_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_h_hf)(Vu) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vdd32.hf=vcvt(Vu32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt_Vb(HVX_Vector Vu) @@ -4582,9 +3978,9 @@ ========================================================================== */ #define Q6_Whf_vcvt_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_b)(Vu) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.hf=vcvt(Vu32.h) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_Vh(HVX_Vector Vu) @@ -4593,9 +3989,9 @@ ========================================================================== */ #define Q6_Vhf_vcvt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_h)(Vu) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.hf=vcvt(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) @@ -4604,9 +4000,9 @@ ========================================================================== */ #define Q6_Vhf_vcvt_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_sf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vdd32.hf=vcvt(Vu32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt_Vub(HVX_Vector Vu) @@ -4615,9 +4011,9 @@ ========================================================================== */ #define Q6_Whf_vcvt_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_ub)(Vu) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.hf=vcvt(Vu32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_Vuh(HVX_Vector Vu) @@ -4626,9 +4022,9 @@ ========================================================================== */ #define Q6_Vhf_vcvt_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_uh)(Vu) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vdd32.sf=vcvt(Vu32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vcvt_Vhf(HVX_Vector Vu) @@ -4637,9 +4033,9 @@ ========================================================================== */ #define Q6_Wsf_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_sf_hf)(Vu) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.ub=vcvt(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vub_vcvt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) @@ -4648,9 +4044,9 @@ ========================================================================== */ #define Q6_Vub_vcvt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_ub_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.uh=vcvt(Vu32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vcvt_Vhf(HVX_Vector Vu) @@ -4659,9 +4055,9 @@ ========================================================================== */ #define Q6_Vuh_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_uh_hf)(Vu) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.sf=vdmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vdmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) @@ -4670,9 +4066,9 @@ ========================================================================== */ #define Q6_Vsf_vdmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpy_sf_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vx32.sf+=vdmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vdmpyacc_VsfVhfVhf(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) @@ -4681,9 +4077,9 @@ ========================================================================== */ #define Q6_Vsf_vdmpyacc_VsfVhfVhf(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpy_sf_hf_acc)(Vx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.hf=vfmax(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfmax_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) @@ -4692,9 +4088,9 @@ ========================================================================== */ #define Q6_Vhf_vfmax_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmax_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.sf=vfmax(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfmax_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) @@ -4703,9 +4099,9 @@ ========================================================================== */ #define Q6_Vsf_vfmax_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmax_sf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.hf=vfmin(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfmin_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) @@ -4714,9 +4110,9 @@ ========================================================================== */ #define Q6_Vhf_vfmin_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmin_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.sf=vfmin(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfmin_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) @@ -4725,9 +4121,9 @@ ========================================================================== */ #define Q6_Vsf_vfmin_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmin_sf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.hf=vfneg(Vu32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfneg_Vhf(HVX_Vector Vu) @@ -4736,9 +4132,9 @@ ========================================================================== */ #define Q6_Vhf_vfneg_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfneg_hf)(Vu) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.sf=vfneg(Vu32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfneg_Vsf(HVX_Vector Vu) @@ -4747,7 +4143,7 @@ ========================================================================== */ #define Q6_Vsf_vfneg_Vsf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfneg_sf)(Vu) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4758,7 +4154,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf)(Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4769,7 +4165,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtand_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4780,7 +4176,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtor_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4791,7 +4187,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4802,7 +4198,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gt_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf)(Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4813,7 +4209,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtand_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4824,7 +4220,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtor_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4835,7 +4231,7 @@ ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4846,7 +4242,7 @@ ========================================================================== */ #define Q6_Vhf_vmax_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmax_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4857,7 +4253,7 @@ ========================================================================== */ #define Q6_Vsf_vmax_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmax_sf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4868,7 +4264,7 @@ ========================================================================== */ #define Q6_Vhf_vmin_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmin_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4879,9 +4275,9 @@ ========================================================================== */ #define Q6_Vsf_vmin_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmin_sf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.hf=vmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) @@ -4890,9 +4286,9 @@ ========================================================================== */ #define Q6_Vhf_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vx32.hf+=vmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmpyacc_VhfVhfVhf(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) @@ -4901,7 +4297,7 @@ ========================================================================== */ #define Q6_Vhf_vmpyacc_VhfVhfVhf(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_hf_acc)(Vx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4912,7 +4308,7 @@ ========================================================================== */ #define Q6_Vqf16_vmpy_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4923,7 +4319,7 @@ ========================================================================== */ #define Q6_Vqf16_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4934,7 +4330,7 @@ ========================================================================== */ #define Q6_Vqf16_vmpy_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16_mix_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4945,7 +4341,7 @@ ========================================================================== */ #define Q6_Vqf32_vmpy_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4956,7 +4352,7 @@ ========================================================================== */ #define Q6_Wqf32_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4967,7 +4363,7 @@ ========================================================================== */ #define Q6_Wqf32_vmpy_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_mix_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4978,7 +4374,7 @@ ========================================================================== */ #define Q6_Wqf32_vmpy_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_qf16)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -4989,9 +4385,9 @@ ========================================================================== */ #define Q6_Vqf32_vmpy_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_sf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vdd32.sf=vmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) @@ -5000,9 +4396,9 @@ ========================================================================== */ #define Q6_Wsf_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vxx32.sf+=vmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpyacc_WsfVhfVhf(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) @@ -5011,9 +4407,9 @@ ========================================================================== */ #define Q6_Wsf_vmpyacc_WsfVhfVhf(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_hf_acc)(Vxx,Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.sf=vmpy(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vmpy_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) @@ -5022,7 +4418,7 @@ ========================================================================== */ #define Q6_Vsf_vmpy_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_sf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -5033,9 +4429,9 @@ ========================================================================== */ #define Q6_Vqf16_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.hf=vsub(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) @@ -5044,7 +4440,7 @@ ========================================================================== */ #define Q6_Vhf_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -5055,7 +4451,7 @@ ========================================================================== */ #define Q6_Vqf16_vsub_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf16)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -5066,7 +4462,7 @@ ========================================================================== */ #define Q6_Vqf16_vsub_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf16_mix)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -5077,7 +4473,7 @@ ========================================================================== */ #define Q6_Vqf32_vsub_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf32)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -5088,7 +4484,7 @@ ========================================================================== */ #define Q6_Vqf32_vsub_Vqf32Vsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf32_mix)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 68 /* ========================================================================== @@ -5099,9 +4495,9 @@ ========================================================================== */ #define Q6_Vqf32_vsub_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vdd32.sf=vsub(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) @@ -5110,9 +4506,9 @@ ========================================================================== */ #define Q6_Wsf_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_hf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif -#if __HVX_ARCH__ >= 68 +#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.sf=vsub(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vsub_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) @@ -5121,7 +4517,7 @@ ========================================================================== */ #define Q6_Vsf_vsub_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_sf)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 68 */ +#endif #if __HVX_ARCH__ >= 69 /* ========================================================================== @@ -5132,7 +4528,7 @@ ========================================================================== */ #define Q6_Vub_vasr_WuhVub_rnd_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvuhubrndsat)(Vuu,Vv) -#endif /* __HEXAGON_ARCH___ >= 69 */ +#endif #if __HVX_ARCH__ >= 69 /* ========================================================================== @@ -5143,7 +4539,7 @@ ========================================================================== */ #define Q6_Vub_vasr_WuhVub_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvuhubsat)(Vuu,Vv) -#endif /* __HEXAGON_ARCH___ >= 69 */ +#endif #if __HVX_ARCH__ >= 69 /* ========================================================================== @@ -5154,7 +4550,7 @@ ========================================================================== */ #define Q6_Vuh_vasr_WwVuh_rnd_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvwuhrndsat)(Vuu,Vv) -#endif /* __HEXAGON_ARCH___ >= 69 */ +#endif #if __HVX_ARCH__ >= 69 /* ========================================================================== @@ -5165,7 +4561,7 @@ ========================================================================== */ #define Q6_Vuh_vasr_WwVuh_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvwuhsat)(Vuu,Vv) -#endif /* __HEXAGON_ARCH___ >= 69 */ +#endif #if __HVX_ARCH__ >= 69 /* ========================================================================== @@ -5176,9 +4572,9 @@ ========================================================================== */ #define Q6_Vuh_vmpy_VuhVuh_rs16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhvs)(Vu,Vv) -#endif /* __HEXAGON_ARCH___ >= 69 */ +#endif -#if __HVX_ARCH__ >= 73 +#if __HVX_ARCH__ >= 73 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vdd32.sf=vadd(Vu32.bf,Vv32.bf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vadd_VbfVbf(HVX_Vector Vu, @@ -5187,7 +4583,7 @@ #define Q6_Wsf_vadd_VbfVbf(Vu, Vv) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf_bf)(Vu, Vv) -#endif /* __HEXAGON_ARCH___ >= 73 */ +#endif #if __HVX_ARCH__ >= 73 /* ========================================================================== @@ -5199,7 +4595,7 @@ #define Q6_Vh_equals_Vhf(Vu) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_h_hf)(Vu) -#endif /* __HEXAGON_ARCH___ >= 73 */ +#endif #if __HVX_ARCH__ >= 73 /* ========================================================================== @@ -5211,7 +4607,7 @@ #define Q6_Vhf_equals_Vh(Vu) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_hf_h)(Vu) -#endif /* __HEXAGON_ARCH___ >= 73 */ +#endif #if __HVX_ARCH__ >= 73 /* ========================================================================== @@ -5223,7 +4619,7 @@ #define Q6_Vsf_equals_Vw(Vu) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_sf_w)(Vu) -#endif /* __HEXAGON_ARCH___ >= 73 */ +#endif #if __HVX_ARCH__ >= 73 /* ========================================================================== @@ -5235,9 +4631,9 @@ #define Q6_Vw_equals_Vsf(Vu) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_w_sf)(Vu) -#endif /* __HEXAGON_ARCH___ >= 73 */ +#endif -#if __HVX_ARCH__ >= 73 +#if __HVX_ARCH__ >= 73 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.bf=vcvt(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vbf_vcvt_VsfVsf(HVX_Vector Vu, @@ -5246,7 +4642,7 @@ #define Q6_Vbf_vcvt_VsfVsf(Vu, Vv) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_bf_sf)(Vu, Vv) -#endif /* __HEXAGON_ARCH___ >= 73 */ +#endif #if __HVX_ARCH__ >= 73 /* ========================================================================== @@ -5258,7 +4654,7 @@ #define Q6_Q_vcmp_gt_VbfVbf(Vu, Vv) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt) \ ((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtbf)(Vu, Vv)), -1) -#endif /* __HEXAGON_ARCH___ >= 73 */ +#endif #if __HVX_ARCH__ >= 73 /* ========================================================================== @@ -5274,7 +4670,7 @@ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu, \ Vv)), \ -1) -#endif /* __HEXAGON_ARCH___ >= 73 */ +#endif #if __HVX_ARCH__ >= 73 /* ========================================================================== @@ -5290,7 +4686,7 @@ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu, \ Vv)), \ -1) -#endif /* __HEXAGON_ARCH___ >= 73 */ +#endif #if __HVX_ARCH__ >= 73 /* ========================================================================== @@ -5306,9 +4702,9 @@ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu, \ Vv)), \ -1) -#endif /* __HEXAGON_ARCH___ >= 73 */ +#endif -#if __HVX_ARCH__ >= 73 +#if __HVX_ARCH__ >= 73 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.bf=vmax(Vu32.bf,Vv32.bf) C Intrinsic Prototype: HVX_Vector Q6_Vbf_vmax_VbfVbf(HVX_Vector Vu, @@ -5317,9 +4713,9 @@ #define Q6_Vbf_vmax_VbfVbf(Vu, Vv) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmax_bf)(Vu, Vv) -#endif /* __HEXAGON_ARCH___ >= 73 */ +#endif -#if __HVX_ARCH__ >= 73 +#if __HVX_ARCH__ >= 73 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.bf=vmin(Vu32.bf,Vv32.bf) C Intrinsic Prototype: HVX_Vector Q6_Vbf_vmin_VbfVbf(HVX_Vector Vu, @@ -5328,9 +4724,9 @@ #define Q6_Vbf_vmin_VbfVbf(Vu, Vv) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmin_bf)(Vu, Vv) -#endif /* __HEXAGON_ARCH___ >= 73 */ +#endif -#if __HVX_ARCH__ >= 73 +#if __HVX_ARCH__ >= 73 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vdd32.sf=vmpy(Vu32.bf,Vv32.bf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpy_VbfVbf(HVX_Vector Vu, @@ -5339,9 +4735,9 @@ #define Q6_Wsf_vmpy_VbfVbf(Vu, Vv) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_bf)(Vu, Vv) -#endif /* __HEXAGON_ARCH___ >= 73 */ +#endif -#if __HVX_ARCH__ >= 73 +#if __HVX_ARCH__ >= 73 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vxx32.sf+=vmpy(Vu32.bf,Vv32.bf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpyacc_WsfVbfVbf(HVX_VectorPair @@ -5351,9 +4747,9 @@ #define Q6_Wsf_vmpyacc_WsfVbfVbf(Vxx, Vu, Vv) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_bf_acc)(Vxx, Vu, Vv) -#endif /* __HEXAGON_ARCH___ >= 73 */ +#endif -#if __HVX_ARCH__ >= 73 +#if __HVX_ARCH__ >= 73 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vdd32.sf=vsub(Vu32.bf,Vv32.bf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vsub_VbfVbf(HVX_Vector Vu, @@ -5362,7 +4758,7 @@ #define Q6_Wsf_vsub_VbfVbf(Vu, Vv) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_bf)(Vu, Vv) -#endif /* __HEXAGON_ARCH___ >= 73 */ +#endif #if __HVX_ARCH__ >= 79 /* ========================================================================== @@ -5374,7 +4770,7 @@ #define Q6_V_vgetqfext_VR(Vu, Rt) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_get_qfext)(Vu, Rt) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif #if __HVX_ARCH__ >= 79 /* ========================================================================== @@ -5386,7 +4782,7 @@ #define Q6_V_vgetqfextor_VVR(Vx, Vu, Rt) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_get_qfext_oracc)(Vx, Vu, Rt) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif #if __HVX_ARCH__ >= 79 /* ========================================================================== @@ -5398,9 +4794,9 @@ #define Q6_V_vsetqfext_VR(Vu, Rt) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_set_qfext)(Vu, Rt) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif -#if __HVX_ARCH__ >= 79 +#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.f8=vabs(Vu32.f8) C Intrinsic Prototype: HVX_Vector Q6_V_vabs_V(HVX_Vector Vu) @@ -5409,9 +4805,9 @@ ========================================================================== */ #define Q6_V_vabs_V(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_f8)(Vu) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif -#if __HVX_ARCH__ >= 79 +#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vdd32.hf=vadd(Vu32.f8,Vv32.f8) C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vadd_VV(HVX_Vector Vu, @@ -5420,9 +4816,9 @@ #define Q6_Whf_vadd_VV(Vu, Vv) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_hf_f8)(Vu, Vv) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif -#if __HVX_ARCH__ >= 79 +#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.b=vcvt2(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vb_vcvt2_VhfVhf(HVX_Vector Vu, @@ -5431,9 +4827,9 @@ #define Q6_Vb_vcvt2_VhfVhf(Vu, Vv) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt2_b_hf)(Vu, Vv) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif -#if __HVX_ARCH__ >= 79 +#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vdd32.hf=vcvt2(Vu32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt2_Vb(HVX_Vector Vu) @@ -5443,9 +4839,9 @@ #define Q6_Whf_vcvt2_Vb(Vu) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt2_hf_b)(Vu) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif -#if __HVX_ARCH__ >= 79 +#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vdd32.hf=vcvt2(Vu32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt2_Vub(HVX_Vector Vu) @@ -5455,9 +4851,9 @@ #define Q6_Whf_vcvt2_Vub(Vu) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt2_hf_ub)(Vu) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif -#if __HVX_ARCH__ >= 79 +#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.ub=vcvt2(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vub_vcvt2_VhfVhf(HVX_Vector Vu, @@ -5466,9 +4862,9 @@ #define Q6_Vub_vcvt2_VhfVhf(Vu, Vv) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt2_ub_hf)(Vu, Vv) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif -#if __HVX_ARCH__ >= 79 +#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.f8=vcvt(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_V_vcvt_VhfVhf(HVX_Vector Vu, HVX_Vector @@ -5477,9 +4873,9 @@ #define Q6_V_vcvt_VhfVhf(Vu, Vv) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_f8_hf)(Vu, Vv) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif -#if __HVX_ARCH__ >= 79 +#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vdd32.hf=vcvt(Vu32.f8) C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt_V(HVX_Vector Vu) @@ -5489,9 +4885,9 @@ #define Q6_Whf_vcvt_V(Vu) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_f8)(Vu) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif -#if __HVX_ARCH__ >= 79 +#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.f8=vfmax(Vu32.f8,Vv32.f8) C Intrinsic Prototype: HVX_Vector Q6_V_vfmax_VV(HVX_Vector Vu, HVX_Vector Vv) @@ -5501,9 +4897,9 @@ #define Q6_V_vfmax_VV(Vu, Vv) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmax_f8)(Vu, Vv) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif -#if __HVX_ARCH__ >= 79 +#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.f8=vfmin(Vu32.f8,Vv32.f8) C Intrinsic Prototype: HVX_Vector Q6_V_vfmin_VV(HVX_Vector Vu, HVX_Vector Vv) @@ -5513,9 +4909,9 @@ #define Q6_V_vfmin_VV(Vu, Vv) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmin_f8)(Vu, Vv) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif -#if __HVX_ARCH__ >= 79 +#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vd32.f8=vfneg(Vu32.f8) C Intrinsic Prototype: HVX_Vector Q6_V_vfneg_V(HVX_Vector Vu) @@ -5525,7 +4921,7 @@ #define Q6_V_vfneg_V(Vu) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfneg_f8)(Vu) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif #if __HVX_ARCH__ >= 79 /* ========================================================================== @@ -5536,9 +4932,9 @@ #define Q6_V_vmerge_VVw(Vu, Vv) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmerge_qf)(Vu, Vv) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif -#if __HVX_ARCH__ >= 79 +#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vdd32.hf=vmpy(Vu32.f8,Vv32.f8) C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vmpy_VV(HVX_Vector Vu, @@ -5547,9 +4943,9 @@ #define Q6_Whf_vmpy_VV(Vu, Vv) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_f8)(Vu, Vv) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif -#if __HVX_ARCH__ >= 79 +#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vxx32.hf+=vmpy(Vu32.f8,Vv32.f8) C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vmpyacc_WhfVV(HVX_VectorPair @@ -5559,7 +4955,7 @@ #define Q6_Whf_vmpyacc_WhfVV(Vxx, Vu, Vv) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_f8_acc)(Vxx, Vu, Vv) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif #if __HVX_ARCH__ >= 79 /* ========================================================================== @@ -5570,7 +4966,7 @@ #define Q6_Vqf16_vmpy_VhfRhf(Vu, Rt) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_rt_hf)(Vu, Rt) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif #if __HVX_ARCH__ >= 79 /* ========================================================================== @@ -5581,7 +4977,7 @@ #define Q6_Vqf16_vmpy_Vqf16Rhf(Vu, Rt) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_rt_qf16)(Vu, Rt) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif #if __HVX_ARCH__ >= 79 /* ========================================================================== @@ -5592,9 +4988,9 @@ #define Q6_Vqf32_vmpy_VsfRsf(Vu, Rt) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_rt_sf)(Vu, Rt) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif -#if __HVX_ARCH__ >= 79 +#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__ /* ========================================================================== Assembly Syntax: Vdd32.hf=vsub(Vu32.f8,Vv32.f8) C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vsub_VV(HVX_Vector Vu, @@ -5603,7 +4999,400 @@ #define Q6_Whf_vsub_VV(Vu, Vv) \ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf_f8)(Vu, Vv) -#endif /* __HEXAGON_ARCH___ >= 79 */ +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vabs(Vu32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vabs_Vhf(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_vabs_Vhf(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_qf16_hf)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vabs(Vu32.qf16) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vabs_Vqf16(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_vabs_Vqf16(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_qf16_qf16)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vabs(Vu32.qf32) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vabs_Vqf32(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_vabs_Vqf32(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_qf32_qf32)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vabs(Vu32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vabs_Vsf(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_vabs_Vsf(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_qf32_sf)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32=valign4(Vu32,Vv32,Rt8) + C Intrinsic Prototype: HVX_Vector Q6_V_valign4_VVR(HVX_Vector Vu, HVX_Vector + Vv, Word32 Rt) Instruction Type: CVI_VA Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_valign4_VVR(Vu, Vv, Rt) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valign4)(Vu, Vv, Rt) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.bf=Vuu32.qf32 + C Intrinsic Prototype: HVX_Vector Q6_Vbf_equals_Wqf32(HVX_VectorPair Vuu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vbf_equals_Wqf32(Vuu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_bf_qf32)(Vuu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.f8=Vu32.qf16 + C Intrinsic Prototype: HVX_Vector Q6_V_equals_Vqf16(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_equals_Vqf16(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_f8_qf16)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.h=Vu32.hf:rnd + C Intrinsic Prototype: HVX_Vector Q6_Vh_equals_Vhf_rnd(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_equals_Vhf_rnd(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_h_hf_rnd)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vdd32.qf16=Vu32.f8 + C Intrinsic Prototype: HVX_VectorPair Q6_Wqf16_equals_V(HVX_Vector Vu) + Instruction Type: CVI_VP_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wqf16_equals_V(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf16_f8)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=Vu32.hf + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_equals_Vhf(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_equals_Vhf(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf16_hf)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=Vu32.qf16 + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_equals_Vqf16(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_equals_Vqf16(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf16_qf16)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=Vu32.qf32 + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_equals_Vqf32(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_equals_Vqf32(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf32_qf32)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=Vu32.sf + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_equals_Vsf(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_equals_Vsf(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf32_sf)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Qd4=vcmp.eq(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VhfVhf(HVX_Vector Vu, + HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eq_VhfVhf(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)( \ + (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqhf)(Vu, Vv)), -1) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Qx4&=vcmp.eq(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVhfVhf(HVX_VectorPred + Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution + Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eqand_QVhfVhf(Qx, Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)( \ + (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqhf_and)( \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu, \ + Vv)), \ + -1) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Qx4|=vcmp.eq(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVhfVhf(HVX_VectorPred + Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution + Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eqor_QVhfVhf(Qx, Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)( \ + (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqhf_or)( \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu, \ + Vv)), \ + -1) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Qx4^=vcmp.eq(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVhfVhf(HVX_VectorPred + Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution + Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eqxacc_QVhfVhf(Qx, Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)( \ + (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqhf_xor)( \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu, \ + Vv)), \ + -1) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Qd4=vcmp.eq(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VsfVsf(HVX_Vector Vu, + HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eq_VsfVsf(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)( \ + (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqsf)(Vu, Vv)), -1) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Qx4&=vcmp.eq(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVsfVsf(HVX_VectorPred + Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution + Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eqand_QVsfVsf(Qx, Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)( \ + (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqsf_and)( \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu, \ + Vv)), \ + -1) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Qx4|=vcmp.eq(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVsfVsf(HVX_VectorPred + Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution + Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eqor_QVsfVsf(Qx, Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)( \ + (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqsf_or)( \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu, \ + Vv)), \ + -1) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Qx4^=vcmp.eq(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVsfVsf(HVX_VectorPred + Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution + Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eqxacc_QVsfVsf(Qx, Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)( \ + (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqsf_xor)( \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu, \ + Vv)), \ + -1) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.w=vilog2(Vu32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vilog2_Vhf(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vilog2_Vhf(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vilog2_hf)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.w=vilog2(Vu32.qf16) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vilog2_Vqf16(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vilog2_Vqf16(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vilog2_qf16)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.w=vilog2(Vu32.qf32) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vilog2_Vqf32(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vilog2_Vqf32(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vilog2_qf32)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.w=vilog2(Vu32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vilog2_Vsf(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vilog2_Vsf(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vilog2_sf)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vneg(Vu32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vneg_Vhf(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_vneg_Vhf(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vneg_qf16_hf)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vneg(Vu32.qf16) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vneg_Vqf16(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_vneg_Vqf16(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vneg_qf16_qf16)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vneg(Vu32.qf32) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vneg_Vqf32(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_vneg_Vqf32(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vneg_qf32_qf32)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vneg(Vu32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vneg_Vsf(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_vneg_Vsf(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vneg_qf32_sf)(Vu) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vsub(Vu32.hf,Vv32.qf16) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_VhfVqf16(HVX_Vector Vu, + HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_vsub_VhfVqf16(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf_mix)(Vu, Vv) +#endif + +#if __HVX_ARCH__ >= 81 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vsub(Vu32.sf,Vv32.qf32) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_VsfVqf32(HVX_Vector Vu, + HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_vsub_VsfVqf32(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_mix)(Vu, Vv) +#endif #endif /* __HVX__ */ diff --git a/lib/include/immintrin.h b/lib/include/immintrin.h index 35f012cc70..19064a4ff5 100644 --- a/lib/include/immintrin.h +++ b/lib/include/immintrin.h @@ -475,24 +475,12 @@ _storebe_i64(void * __P, long long __D) { #include -#include - #include -#include - #include #include -#include - -#include - -#include - -#include - #include #include diff --git a/lib/include/intrin.h b/lib/include/intrin.h index 588c283cbd..210ed0c1f7 100644 --- a/lib/include/intrin.h +++ b/lib/include/intrin.h @@ -30,6 +30,10 @@ #include #endif +#if defined(__ARM_ACLE) +#include +#endif + /* For the definition of jmp_buf. */ #if __STDC_HOSTED__ #include diff --git a/lib/include/lasxintrin.h b/lib/include/lasxintrin.h index 85020d8282..83cc4288a9 100644 --- a/lib/include/lasxintrin.h +++ b/lib/include/lasxintrin.h @@ -10,6 +10,8 @@ #ifndef _LOONGSON_ASXINTRIN_H #define _LOONGSON_ASXINTRIN_H 1 +#include + #if defined(__loongarch_asx) typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); @@ -3882,5 +3884,116 @@ extern __inline #define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1))) +#if defined(__loongarch_asx_sx_conv) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, + __artificial__)) __m256 __lasx_cast_128_s(__m128 _1) { + return (__m256)__builtin_lasx_cast_128_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_cast_128_d(__m128d _1) { + return (__m256d)__builtin_lasx_cast_128_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_cast_128(__m128i _1) { + return (__m256i)__builtin_lasx_cast_128((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_concat_128_s(__m128 _1, __m128 _2) { + return (__m256)__builtin_lasx_concat_128_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_concat_128_d(__m128d _1, __m128d _2) { + return (__m256d)__builtin_lasx_concat_128_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_concat_128(__m128i _1, __m128i _2) { + return (__m256i)__builtin_lasx_concat_128((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lasx_extract_128_lo_s(__m256 _1) { + return (__m128)__builtin_lasx_extract_128_lo_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lasx_extract_128_lo_d(__m256d _1) { + return (__m128d)__builtin_lasx_extract_128_lo_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lasx_extract_128_lo(__m256i _1) { + return (__m128i)__builtin_lasx_extract_128_lo((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lasx_extract_128_hi_s(__m256 _1) { + return (__m128)__builtin_lasx_extract_128_hi_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lasx_extract_128_hi_d(__m256d _1) { + return (__m128d)__builtin_lasx_extract_128_hi_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lasx_extract_128_hi(__m256i _1) { + return (__m128i)__builtin_lasx_extract_128_hi((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_insert_128_lo_s(__m256 _1, __m128 _2) { + return (__m256)__builtin_lasx_insert_128_lo_s((v8f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_insert_128_lo_d(__m256d _1, __m128d _2) { + return (__m256d)__builtin_lasx_insert_128_lo_d((v4f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_insert_128_lo(__m256i _1, __m128i _2) { + return (__m256i)__builtin_lasx_insert_128_lo((v4i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_insert_128_hi_s(__m256 _1, __m128 _2) { + return (__m256)__builtin_lasx_insert_128_hi_s((v8f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_insert_128_hi_d(__m256d _1, __m128d _2) { + return (__m256d)__builtin_lasx_insert_128_hi_d((v4f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_insert_128_hi(__m256i _1, __m128i _2) { + return (__m256i)__builtin_lasx_insert_128_hi((v4i64)_1, (v2i64)_2); +} + +#endif /* defined(__loongarch_asx_sx_conv). */ #endif /* defined(__loongarch_asx). */ #endif /* _LOONGSON_ASXINTRIN_H. */ diff --git a/lib/include/llvm_libc_wrappers/assert.h b/lib/include/llvm_libc_wrappers/assert.h index 610ed96a45..7eadb2c354 100644 --- a/lib/include/llvm_libc_wrappers/assert.h +++ b/lib/include/llvm_libc_wrappers/assert.h @@ -19,13 +19,11 @@ #if defined(__HIP__) || defined(__CUDA__) #define __LIBC_ATTRS __attribute__((device)) +#else +#define __LIBC_ATTRS #endif -#pragma omp begin declare target - -#include - -#pragma omp end declare target +// TODO: Define these for CUDA / HIP. #undef __LIBC_ATTRS diff --git a/lib/include/llvm_libc_wrappers/ctype.h b/lib/include/llvm_libc_wrappers/ctype.h index 960cf43302..79b0c1e9be 100644 --- a/lib/include/llvm_libc_wrappers/ctype.h +++ b/lib/include/llvm_libc_wrappers/ctype.h @@ -13,128 +13,16 @@ #error "This file is for GPU offloading compilation only" #endif -// The GNU headers like to define 'toupper' and 'tolower' redundantly. This is -// necessary to prevent it from doing that and remapping our implementation. -#if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(__GLIBC__) -#pragma push_macro("__USE_EXTERN_INLINES") -#undef __USE_EXTERN_INLINES -#endif - #include_next -#if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(__GLIBC__) -#pragma pop_macro("__USE_EXTERN_INLINES") -#endif - -#if __has_include() - #if defined(__HIP__) || defined(__CUDA__) #define __LIBC_ATTRS __attribute__((device)) +#else +#define __LIBC_ATTRS #endif -// The GNU headers like to provide these as macros, we need to undefine them so -// they do not conflict with the following definitions for the GPU. - -#pragma push_macro("isalnum") -#pragma push_macro("isalpha") -#pragma push_macro("isascii") -#pragma push_macro("isblank") -#pragma push_macro("iscntrl") -#pragma push_macro("isdigit") -#pragma push_macro("isgraph") -#pragma push_macro("islower") -#pragma push_macro("isprint") -#pragma push_macro("ispunct") -#pragma push_macro("isspace") -#pragma push_macro("isupper") -#pragma push_macro("isxdigit") -#pragma push_macro("toascii") -#pragma push_macro("tolower") -#pragma push_macro("toupper") -#pragma push_macro("isalnum_l") -#pragma push_macro("isalpha_l") -#pragma push_macro("isascii_l") -#pragma push_macro("isblank_l") -#pragma push_macro("iscntrl_l") -#pragma push_macro("isdigit_l") -#pragma push_macro("isgraph_l") -#pragma push_macro("islower_l") -#pragma push_macro("isprint_l") -#pragma push_macro("ispunct_l") -#pragma push_macro("isspace_l") -#pragma push_macro("isupper_l") -#pragma push_macro("isxdigit_l") - -#undef isalnum -#undef isalpha -#undef isascii -#undef iscntrl -#undef isdigit -#undef islower -#undef isgraph -#undef isprint -#undef ispunct -#undef isspace -#undef isupper -#undef isblank -#undef isxdigit -#undef toascii -#undef tolower -#undef toupper -#undef isalnum_l -#undef isalpha_l -#undef iscntrl_l -#undef isdigit_l -#undef islower_l -#undef isgraph_l -#undef isprint_l -#undef ispunct_l -#undef isspace_l -#undef isupper_l -#undef isblank_l -#undef isxdigit_l - -#pragma omp begin declare target - -#include - -#pragma omp end declare target - -// Restore the original macros when compiling on the host. -#if !defined(__NVPTX__) && !defined(__AMDGPU__) -#pragma pop_macro("isalnum") -#pragma pop_macro("isalpha") -#pragma pop_macro("isascii") -#pragma pop_macro("isblank") -#pragma pop_macro("iscntrl") -#pragma pop_macro("isdigit") -#pragma pop_macro("isgraph") -#pragma pop_macro("islower") -#pragma pop_macro("isprint") -#pragma pop_macro("ispunct") -#pragma pop_macro("isspace") -#pragma pop_macro("isupper") -#pragma pop_macro("isxdigit") -#pragma pop_macro("toascii") -#pragma pop_macro("tolower") -#pragma pop_macro("toupper") -#pragma pop_macro("isalnum_l") -#pragma pop_macro("isalpha_l") -#pragma pop_macro("isascii_l") -#pragma pop_macro("isblank_l") -#pragma pop_macro("iscntrl_l") -#pragma pop_macro("isdigit_l") -#pragma pop_macro("isgraph_l") -#pragma pop_macro("islower_l") -#pragma pop_macro("isprint_l") -#pragma pop_macro("ispunct_l") -#pragma pop_macro("isspace_l") -#pragma pop_macro("isupper_l") -#pragma pop_macro("isxdigit_l") -#endif +// TODO: Define these for CUDA / HIP. #undef __LIBC_ATTRS -#endif - #endif // __CLANG_LLVM_LIBC_WRAPPERS_CTYPE_H__ diff --git a/lib/include/llvm_libc_wrappers/inttypes.h b/lib/include/llvm_libc_wrappers/inttypes.h index 415f1e4b7b..2261389824 100644 --- a/lib/include/llvm_libc_wrappers/inttypes.h +++ b/lib/include/llvm_libc_wrappers/inttypes.h @@ -19,13 +19,11 @@ #if defined(__HIP__) || defined(__CUDA__) #define __LIBC_ATTRS __attribute__((device)) +#else +#define __LIBC_ATTRS #endif -#pragma omp begin declare target - -#include - -#pragma omp end declare target +// TODO: Define these for CUDA / HIP. #undef __LIBC_ATTRS diff --git a/lib/include/llvm_libc_wrappers/stdio.h b/lib/include/llvm_libc_wrappers/stdio.h index 950f91b376..0c3e44823d 100644 --- a/lib/include/llvm_libc_wrappers/stdio.h +++ b/lib/include/llvm_libc_wrappers/stdio.h @@ -6,45 +6,19 @@ // //===----------------------------------------------------------------------===// +#ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__ +#define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__ + #if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__) #error "This file is for GPU offloading compilation only" #endif #include_next -// In some old versions of glibc, other standard headers sometimes define -// special macros (e.g., __need_FILE) before including stdio.h to cause stdio.h -// to produce special definitions. Future includes of stdio.h when those -// special macros are undefined are expected to produce the normal definitions -// from stdio.h. -// -// We do not apply our include guard (__CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__) -// unconditionally to the above include_next. Otherwise, after an occurrence of -// the first glibc stdio.h use case described above, the include_next would be -// skipped for remaining includes of stdio.h, leaving required symbols -// undefined. -// -// We make the following assumptions to handle all use cases: -// -// 1. If the above include_next produces special glibc definitions, then (a) it -// does not produce the normal definitions that we must intercept below, (b) -// the current file was included from a glibc header that already defined -// __GLIBC__ (usually by including glibc's ), and (c) the above -// include_next does not define _STDIO_H. In that case, we skip the rest of -// the current file and don't guard against future includes. -// 2. If the above include_next produces the normal stdio.h definitions, then -// either (a) __GLIBC__ is not defined because C headers are from some other -// libc implementation or (b) the above include_next defines _STDIO_H to -// prevent the above include_next from having any effect in the future. -#if !defined(__GLIBC__) || defined(_STDIO_H) - -#ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__ -#define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__ - -#if __has_include() - #if defined(__HIP__) || defined(__CUDA__) #define __LIBC_ATTRS __attribute__((device)) +#else +#define __LIBC_ATTRS #endif // Some headers provide these as macros. Temporarily undefine them so they do @@ -60,21 +34,19 @@ #pragma omp begin declare target -#include +__LIBC_ATTRS extern FILE *stderr; +__LIBC_ATTRS extern FILE *stdin; +__LIBC_ATTRS extern FILE *stdout; #pragma omp end declare target -#undef __LIBC_ATTRS - // Restore the original macros when compiling on the host. #if !defined(__NVPTX__) && !defined(__AMDGPU__) -#pragma pop_macro("stdout") #pragma pop_macro("stderr") #pragma pop_macro("stdin") +#pragma pop_macro("stdout") #endif -#endif +#undef __LIBC_ATTRS #endif // __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__ - -#endif diff --git a/lib/include/llvm_libc_wrappers/stdlib.h b/lib/include/llvm_libc_wrappers/stdlib.h index 1da22abd0b..7af5e2ebe0 100644 --- a/lib/include/llvm_libc_wrappers/stdlib.h +++ b/lib/include/llvm_libc_wrappers/stdlib.h @@ -15,39 +15,18 @@ #include_next -#if __has_include() - #if defined(__HIP__) || defined(__CUDA__) #define __LIBC_ATTRS __attribute__((device)) +#else +#define __LIBC_ATTRS #endif #pragma omp begin declare target -// The LLVM C library uses these named types so we forward declare them. -typedef void (*__atexithandler_t)(void); -typedef int (*__search_compare_t)(const void *, const void *); -typedef int (*__qsortcompare_t)(const void *, const void *); -typedef int (*__qsortrcompare_t)(const void *, const void *, void *); - -// Enforce ABI compatibility with the structs used by the LLVM C library. -_Static_assert(__builtin_offsetof(div_t, quot) == 0, "ABI mismatch!"); -_Static_assert(__builtin_offsetof(ldiv_t, quot) == 0, "ABI mismatch!"); -_Static_assert(__builtin_offsetof(lldiv_t, quot) == 0, "ABI mismatch!"); - -#if defined(__GLIBC__) && __cplusplus >= 201703L -#define at_quick_exit atexit -#endif - -#include - -#if defined(__GLIBC__) && __cplusplus >= 201703L -#undef at_quick_exit -#endif +// TODO: Define these for CUDA / HIP. #pragma omp end declare target #undef __LIBC_ATTRS -#endif - #endif // __CLANG_LLVM_LIBC_WRAPPERS_STDLIB_H__ diff --git a/lib/include/llvm_libc_wrappers/string.h b/lib/include/llvm_libc_wrappers/string.h index 0ea49cb137..766a58f5b6 100644 --- a/lib/include/llvm_libc_wrappers/string.h +++ b/lib/include/llvm_libc_wrappers/string.h @@ -15,82 +15,14 @@ #include_next -#if __has_include() - #if defined(__HIP__) || defined(__CUDA__) #define __LIBC_ATTRS __attribute__((device)) -#endif - -#pragma omp begin declare target - -// The GNU headers provide C++ standard compliant headers when in C++ mode and -// the LLVM libc does not. We need to manually provide the definitions using the -// same prototypes. -#if defined(__cplusplus) && defined(__GLIBC__) && \ - defined(__CORRECT_ISO_CPP_STRING_H_PROTO) - -#ifndef __LIBC_ATTRS +#else #define __LIBC_ATTRS #endif -extern "C" { -void *memccpy(void *__restrict, const void *__restrict, int, - size_t) __LIBC_ATTRS; -int memcmp(const void *, const void *, size_t) __LIBC_ATTRS; -void *memcpy(void *__restrict, const void *__restrict, size_t) __LIBC_ATTRS; -void *memmem(const void *, size_t, const void *, size_t) __LIBC_ATTRS; -void *memmove(void *, const void *, size_t) __LIBC_ATTRS; -void *mempcpy(void *__restrict, const void *__restrict, size_t) __LIBC_ATTRS; -void *memset(void *, int, size_t) __LIBC_ATTRS; -char *stpcpy(char *__restrict, const char *__restrict) __LIBC_ATTRS; -char *stpncpy(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS; -char *strcat(char *__restrict, const char *__restrict) __LIBC_ATTRS; -int strcmp(const char *, const char *) __LIBC_ATTRS; -int strcoll(const char *, const char *) __LIBC_ATTRS; -char *strcpy(char *__restrict, const char *__restrict) __LIBC_ATTRS; -size_t strcspn(const char *, const char *) __LIBC_ATTRS; -char *strdup(const char *) __LIBC_ATTRS; -size_t strlen(const char *) __LIBC_ATTRS; -char *strncat(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS; -int strncmp(const char *, const char *, size_t) __LIBC_ATTRS; -char *strncpy(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS; -char *strndup(const char *, size_t) __LIBC_ATTRS; -size_t strnlen(const char *, size_t) __LIBC_ATTRS; -size_t strspn(const char *, const char *) __LIBC_ATTRS; -char *strtok(char *__restrict, const char *__restrict) __LIBC_ATTRS; -char *strtok_r(char *__restrict, const char *__restrict, - char **__restrict) __LIBC_ATTRS; -size_t strxfrm(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS; -} - -extern "C++" { -char *strstr(char *, const char *) noexcept __LIBC_ATTRS; -const char *strstr(const char *, const char *) noexcept __LIBC_ATTRS; -char *strpbrk(char *, const char *) noexcept __LIBC_ATTRS; -const char *strpbrk(const char *, const char *) noexcept __LIBC_ATTRS; -char *strrchr(char *, int) noexcept __LIBC_ATTRS; -const char *strrchr(const char *, int) noexcept __LIBC_ATTRS; -char *strchr(char *, int) noexcept __LIBC_ATTRS; -const char *strchr(const char *, int) noexcept __LIBC_ATTRS; -char *strchrnul(char *, int) noexcept __LIBC_ATTRS; -const char *strchrnul(const char *, int) noexcept __LIBC_ATTRS; -char *strcasestr(char *, const char *) noexcept __LIBC_ATTRS; -const char *strcasestr(const char *, const char *) noexcept __LIBC_ATTRS; -void *memrchr(void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS; -const void *memrchr(const void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS; -void *memchr(void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS; -const void *memchr(const void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS; -} - -#else -#include - -#endif - -#pragma omp end declare target +// TODO: Define these for CUDA / HIP. #undef __LIBC_ATTRS -#endif - #endif // __CLANG_LLVM_LIBC_WRAPPERS_STRING_H__ diff --git a/lib/include/llvm_libc_wrappers/time.h b/lib/include/llvm_libc_wrappers/time.h index 9d1340c4eb..d38eea327a 100644 --- a/lib/include/llvm_libc_wrappers/time.h +++ b/lib/include/llvm_libc_wrappers/time.h @@ -15,20 +15,14 @@ #include_next -#if __has_include() - #if defined(__HIP__) || defined(__CUDA__) #define __LIBC_ATTRS __attribute__((device)) +#else +#define __LIBC_ATTRS #endif -#pragma omp begin declare target +// TODO: Define these for CUDA / HIP. -_Static_assert(sizeof(clock_t) == sizeof(long), "ABI mismatch!"); - -#include - -#pragma omp end declare target - -#endif +#undef __LIBC_ATTRS #endif // __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__ diff --git a/lib/include/mmintrin.h b/lib/include/mmintrin.h index dc0fa5c523..2cf46455d7 100644 --- a/lib/include/mmintrin.h +++ b/lib/include/mmintrin.h @@ -39,27 +39,21 @@ typedef short __v8hi __attribute__((__vector_size__(16))); typedef char __v16qi __attribute__((__vector_size__(16))); /* Define the default attributes for the functions in this file. */ -#if defined(__EVEX512__) && !defined(__AVX10_1_512__) +#if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS_SSE2 \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("sse2,no-evex512"), __min_vector_width__(128))) + __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \ + __min_vector_width__(128))) constexpr #else #define __DEFAULT_FN_ATTRS_SSE2 \ __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \ __min_vector_width__(128))) #endif -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 constexpr -#else -#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 -#endif - #define __trunc64(x) \ (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0) -#define __anyext128(x) \ +#define __zext128(x) \ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ - 1, -1, -1) + 1, 2, 3) /// Clears the MMX state by setting the state of the x87 stack registers /// to empty. @@ -68,9 +62,9 @@ typedef char __v16qi __attribute__((__vector_size__(16))); /// /// This intrinsic corresponds to the EMMS instruction. /// -static __inline__ void __attribute__((__always_inline__, __nodebug__, - __target__("mmx,no-evex512"))) -_mm_empty(void) { +static __inline__ void + __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) + _mm_empty(void) { __builtin_ia32_emms(); } @@ -85,10 +79,8 @@ _mm_empty(void) { /// A 32-bit integer value. /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the /// parameter. The upper 32 bits are set to 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_cvtsi32_si64(int __i) -{ - return __extension__ (__m64)(__v2si){__i, 0}; +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi32_si64(int __i) { + return __extension__(__m64)(__v2si){__i, 0}; } /// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit @@ -102,10 +94,8 @@ _mm_cvtsi32_si64(int __i) /// A 64-bit integer vector. /// \returns A 32-bit signed integer value containing the lower 32 bits of the /// parameter. -static __inline__ int __DEFAULT_FN_ATTRS_SSE2 -_mm_cvtsi64_si32(__m64 __m) -{ - return ((__v2si)__m)[0]; +static __inline__ int __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi64_si32(__m64 __m) { + return ((__v2si)__m)[0]; } /// Casts a 64-bit signed integer value into a 64-bit integer vector. @@ -118,10 +108,8 @@ _mm_cvtsi64_si32(__m64 __m) /// A 64-bit signed integer. /// \returns A 64-bit integer vector containing the same bitwise pattern as the /// parameter. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_cvtsi64_m64(long long __i) -{ - return (__m64)__i; +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi64_m64(long long __i) { + return __extension__(__m64)(__v1di){__i}; } /// Casts a 64-bit integer vector into a 64-bit signed integer value. @@ -134,10 +122,8 @@ _mm_cvtsi64_m64(long long __i) /// A 64-bit integer vector. /// \returns A 64-bit signed integer containing the same bitwise pattern as the /// parameter. -static __inline__ long long __DEFAULT_FN_ATTRS_SSE2 -_mm_cvtm64_si64(__m64 __m) -{ - return (long long)__m; +static __inline__ long long __DEFAULT_FN_ATTRS_SSE2 _mm_cvtm64_si64(__m64 __m) { + return ((__v1di)__m)[0]; } /// Converts, with saturation, 16-bit signed integers from both 64-bit integer @@ -159,11 +145,10 @@ _mm_cvtm64_si64(__m64 __m) /// written to the upper 32 bits of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the converted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_packs_pi16(__m64 __m1, __m64 __m2) -{ - return __trunc64(__builtin_ia32_packsswb128( - (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pi16(__m64 __m1, + __m64 __m2) { + return __trunc64(__builtin_ia32_packsswb128( + (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){})); } /// Converts, with saturation, 32-bit signed integers from both 64-bit integer @@ -185,11 +170,10 @@ _mm_packs_pi16(__m64 __m1, __m64 __m2) /// written to the upper 32 bits of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the converted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_packs_pi32(__m64 __m1, __m64 __m2) -{ - return __trunc64(__builtin_ia32_packssdw128( - (__v4si)__builtin_shufflevector(__m1, __m2, 0, 1), (__v4si){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pi32(__m64 __m1, + __m64 __m2) { + return __trunc64(__builtin_ia32_packssdw128( + (__v4si)__builtin_shufflevector(__m1, __m2, 0, 1), (__v4si){})); } /// Converts, with saturation, 16-bit signed integers from both 64-bit integer @@ -211,11 +195,10 @@ _mm_packs_pi32(__m64 __m1, __m64 __m2) /// written to the upper 32 bits of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the converted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_packs_pu16(__m64 __m1, __m64 __m2) -{ - return __trunc64(__builtin_ia32_packuswb128( - (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pu16(__m64 __m1, + __m64 __m2) { + return __trunc64(__builtin_ia32_packuswb128( + (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){})); } /// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] @@ -239,11 +222,10 @@ _mm_packs_pu16(__m64 __m1, __m64 __m2) /// Bits [63:56] are written to bits [63:56] of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, - 4, 12, 5, 13, 6, 14, 7, 15); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi8(__m64 __m1, + __m64 __m2) { + return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 4, 12, 5, + 13, 6, 14, 7, 15); } /// Unpacks the upper 32 bits from two 64-bit integer vectors of @@ -263,11 +245,9 @@ _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) /// Bits [63:48] are written to bits [63:48] of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, - 2, 6, 3, 7); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi16(__m64 __m1, + __m64 __m2) { + return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 2, 6, 3, 7); } /// Unpacks the upper 32 bits from two 64-bit integer vectors of @@ -285,10 +265,9 @@ _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) /// the upper 32 bits of the result. /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi32(__m64 __m1, + __m64 __m2) { + return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3); } /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] @@ -312,11 +291,10 @@ _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) /// Bits [31:24] are written to bits [63:56] of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, - 0, 8, 1, 9, 2, 10, 3, 11); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi8(__m64 __m1, + __m64 __m2) { + return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 0, 8, 1, 9, + 2, 10, 3, 11); } /// Unpacks the lower 32 bits from two 64-bit integer vectors of @@ -336,11 +314,9 @@ _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) /// Bits [31:16] are written to bits [63:48] of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, - 0, 4, 1, 5); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi16(__m64 __m1, + __m64 __m2) { + return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 0, 4, 1, 5); } /// Unpacks the lower 32 bits from two 64-bit integer vectors of @@ -358,10 +334,9 @@ _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) /// the upper 32 bits of the result. /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi32(__m64 __m1, + __m64 __m2) { + return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2); } /// Adds each 8-bit integer element of the first 64-bit integer vector @@ -379,10 +354,9 @@ _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_add_pi8(__m64 __m1, __m64 __m2) -{ - return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi8(__m64 __m1, + __m64 __m2) { + return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2)); } /// Adds each 16-bit integer element of the first 64-bit integer vector @@ -400,10 +374,9 @@ _mm_add_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_add_pi16(__m64 __m1, __m64 __m2) -{ - return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi16(__m64 __m1, + __m64 __m2) { + return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2)); } /// Adds each 32-bit integer element of the first 64-bit integer vector @@ -421,10 +394,9 @@ _mm_add_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_add_pi32(__m64 __m1, __m64 __m2) -{ - return (__m64)(((__v2su)__m1) + ((__v2su)__m2)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi32(__m64 __m1, + __m64 __m2) { + return (__m64)(((__v2su)__m1) + ((__v2su)__m2)); } /// Adds, with saturation, each 8-bit signed integer element of the first @@ -445,10 +417,9 @@ _mm_add_pi32(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums /// of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_adds_pi8(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pi8(__m64 __m1, + __m64 __m2) { + return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2); } /// Adds, with saturation, each 16-bit signed integer element of the first @@ -469,10 +440,9 @@ _mm_adds_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums /// of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_adds_pi16(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pi16(__m64 __m1, + __m64 __m2) { + return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2); } /// Adds, with saturation, each 8-bit unsigned integer element of the first @@ -492,10 +462,9 @@ _mm_adds_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated /// unsigned sums of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_adds_pu8(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_add_sat((__v8qu)__m1, (__v8qu)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pu8(__m64 __m1, + __m64 __m2) { + return (__m64)__builtin_elementwise_add_sat((__v8qu)__m1, (__v8qu)__m2); } /// Adds, with saturation, each 16-bit unsigned integer element of the first @@ -515,10 +484,9 @@ _mm_adds_pu8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated /// unsigned sums of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_adds_pu16(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_add_sat((__v4hu)__m1, (__v4hu)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pu16(__m64 __m1, + __m64 __m2) { + return (__m64)__builtin_elementwise_add_sat((__v4hu)__m1, (__v4hu)__m2); } /// Subtracts each 8-bit integer element of the second 64-bit integer @@ -536,10 +504,9 @@ _mm_adds_pu16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8] containing the subtrahends. /// \returns A 64-bit integer vector of [8 x i8] containing the differences of /// both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_sub_pi8(__m64 __m1, __m64 __m2) -{ - return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi8(__m64 __m1, + __m64 __m2) { + return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2)); } /// Subtracts each 16-bit integer element of the second 64-bit integer @@ -557,10 +524,9 @@ _mm_sub_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16] containing the subtrahends. /// \returns A 64-bit integer vector of [4 x i16] containing the differences of /// both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_sub_pi16(__m64 __m1, __m64 __m2) -{ - return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi16(__m64 __m1, + __m64 __m2) { + return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2)); } /// Subtracts each 32-bit integer element of the second 64-bit integer @@ -578,10 +544,9 @@ _mm_sub_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32] containing the subtrahends. /// \returns A 64-bit integer vector of [2 x i32] containing the differences of /// both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_sub_pi32(__m64 __m1, __m64 __m2) -{ - return (__m64)(((__v2su)__m1) - ((__v2su)__m2)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi32(__m64 __m1, + __m64 __m2) { + return (__m64)(((__v2su)__m1) - ((__v2su)__m2)); } /// Subtracts, with saturation, each 8-bit signed integer element of the second @@ -602,10 +567,9 @@ _mm_sub_pi32(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8] containing the subtrahends. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated /// differences of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_subs_pi8(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pi8(__m64 __m1, + __m64 __m2) { + return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2); } /// Subtracts, with saturation, each 16-bit signed integer element of the @@ -626,10 +590,9 @@ _mm_subs_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16] containing the subtrahends. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated /// differences of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_subs_pi16(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pi16(__m64 __m1, + __m64 __m2) { + return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2); } /// Subtracts each 8-bit unsigned integer element of the second 64-bit @@ -650,10 +613,9 @@ _mm_subs_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8] containing the subtrahends. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated /// differences of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_subs_pu8(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_sub_sat((__v8qu)__m1, (__v8qu)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pu8(__m64 __m1, + __m64 __m2) { + return (__m64)__builtin_elementwise_sub_sat((__v8qu)__m1, (__v8qu)__m2); } /// Subtracts each 16-bit unsigned integer element of the second 64-bit @@ -674,10 +636,9 @@ _mm_subs_pu8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16] containing the subtrahends. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated /// differences of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_subs_pu16(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_sub_sat((__v4hu)__m1, (__v4hu)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pu16(__m64 __m1, + __m64 __m2) { + return (__m64)__builtin_elementwise_sub_sat((__v4hu)__m1, (__v4hu)__m2); } /// Multiplies each 16-bit signed integer element of the first 64-bit @@ -701,11 +662,10 @@ _mm_subs_pu16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [2 x i32] containing the sums of /// products of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_madd_pi16(__m64 __m1, __m64 __m2) -{ - return __trunc64(__builtin_ia32_pmaddwd128((__v8hi)__anyext128(__m1), - (__v8hi)__anyext128(__m2))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_madd_pi16(__m64 __m1, + __m64 __m2) { + return __trunc64(__builtin_ia32_pmaddwd128((__v8hi)__zext128(__m1), + (__v8hi)__zext128(__m2))); } /// Multiplies each 16-bit signed integer element of the first 64-bit @@ -723,11 +683,10 @@ _mm_madd_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits /// of the products of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_mulhi_pi16(__m64 __m1, __m64 __m2) -{ - return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__anyext128(__m1), - (__v8hi)__anyext128(__m2))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_mulhi_pi16(__m64 __m1, + __m64 __m2) { + return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__zext128(__m1), + (__v8hi)__zext128(__m2))); } /// Multiplies each 16-bit signed integer element of the first 64-bit @@ -745,10 +704,9 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits /// of the products of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_mullo_pi16(__m64 __m1, __m64 __m2) -{ - return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_mullo_pi16(__m64 __m1, + __m64 __m2) { + return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2)); } /// Left-shifts each 16-bit signed integer element of the first @@ -771,8 +729,8 @@ _mm_mullo_pi16(__m64 __m1, __m64 __m2) static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sll_pi16(__m64 __m, __m64 __count) { - return __trunc64(__builtin_ia32_psllw128((__v8hi)__anyext128(__m), - (__v8hi)__anyext128(__count))); + return __trunc64(__builtin_ia32_psllw128((__v8hi)__zext128(__m), + (__v8hi)__zext128(__count))); } /// Left-shifts each 16-bit signed integer element of a 64-bit integer @@ -791,11 +749,9 @@ _mm_sll_pi16(__m64 __m, __m64 __count) /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted /// values. If \a __count is greater or equal to 16, the result is set to all /// 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_slli_pi16(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psllwi128((__v8hi)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_pi16(__m64 __m, + int __count) { + return __trunc64(__builtin_ia32_psllwi128((__v8hi)__zext128(__m), __count)); } /// Left-shifts each 32-bit signed integer element of the first @@ -818,8 +774,8 @@ _mm_slli_pi16(__m64 __m, int __count) static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sll_pi32(__m64 __m, __m64 __count) { - return __trunc64(__builtin_ia32_pslld128((__v4si)__anyext128(__m), - (__v4si)__anyext128(__count))); + return __trunc64(__builtin_ia32_pslld128((__v4si)__zext128(__m), + (__v4si)__zext128(__count))); } /// Left-shifts each 32-bit signed integer element of a 64-bit integer @@ -838,11 +794,9 @@ _mm_sll_pi32(__m64 __m, __m64 __count) /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted /// values. If \a __count is greater or equal to 32, the result is set to all /// 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_slli_pi32(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_pslldi128((__v4si)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_pi32(__m64 __m, + int __count) { + return __trunc64(__builtin_ia32_pslldi128((__v4si)__zext128(__m), __count)); } /// Left-shifts the first 64-bit integer parameter by the number of bits @@ -862,8 +816,8 @@ _mm_slli_pi32(__m64 __m, int __count) static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sll_si64(__m64 __m, __m64 __count) { - return __trunc64(__builtin_ia32_psllq128((__v2di)__anyext128(__m), - (__v2di)__anyext128(__count))); + return __trunc64(__builtin_ia32_psllq128((__v2di)__zext128(__m), + (__v2di)__zext128(__count))); } /// Left-shifts the first parameter, which is a 64-bit integer, by the @@ -880,11 +834,9 @@ _mm_sll_si64(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector containing the left-shifted value. If /// \a __count is greater or equal to 64, the result is set to 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_slli_si64(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psllqi128((__v2di)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_si64(__m64 __m, + int __count) { + return __trunc64(__builtin_ia32_psllqi128((__v2di)__zext128(__m), __count)); } /// Right-shifts each 16-bit integer element of the first parameter, @@ -908,8 +860,8 @@ _mm_slli_si64(__m64 __m, int __count) static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sra_pi16(__m64 __m, __m64 __count) { - return __trunc64(__builtin_ia32_psraw128((__v8hi)__anyext128(__m), - (__v8hi)__anyext128(__count))); + return __trunc64(__builtin_ia32_psraw128((__v8hi)__zext128(__m), + (__v8hi)__zext128(__count))); } /// Right-shifts each 16-bit integer element of a 64-bit integer vector @@ -929,11 +881,9 @@ _mm_sra_pi16(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_srai_pi16(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psrawi128((__v8hi)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srai_pi16(__m64 __m, + int __count) { + return __trunc64(__builtin_ia32_psrawi128((__v8hi)__zext128(__m), __count)); } /// Right-shifts each 32-bit integer element of the first parameter, @@ -957,8 +907,8 @@ _mm_srai_pi16(__m64 __m, int __count) static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sra_pi32(__m64 __m, __m64 __count) { - return __trunc64(__builtin_ia32_psrad128((__v4si)__anyext128(__m), - (__v4si)__anyext128(__count))); + return __trunc64(__builtin_ia32_psrad128((__v4si)__zext128(__m), + (__v4si)__zext128(__count))); } /// Right-shifts each 32-bit integer element of a 64-bit integer vector @@ -978,11 +928,9 @@ _mm_sra_pi32(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_srai_pi32(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psradi128((__v4si)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srai_pi32(__m64 __m, + int __count) { + return __trunc64(__builtin_ia32_psradi128((__v4si)__zext128(__m), __count)); } /// Right-shifts each 16-bit integer element of the first parameter, @@ -1005,8 +953,8 @@ _mm_srai_pi32(__m64 __m, int __count) static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srl_pi16(__m64 __m, __m64 __count) { - return __trunc64(__builtin_ia32_psrlw128((__v8hi)__anyext128(__m), - (__v8hi)__anyext128(__count))); + return __trunc64(__builtin_ia32_psrlw128((__v8hi)__zext128(__m), + (__v8hi)__zext128(__count))); } /// Right-shifts each 16-bit integer element of a 64-bit integer vector @@ -1025,11 +973,9 @@ _mm_srl_pi16(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_srli_pi16(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_pi16(__m64 __m, + int __count) { + return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__zext128(__m), __count)); } /// Right-shifts each 32-bit integer element of the first parameter, @@ -1052,8 +998,8 @@ _mm_srli_pi16(__m64 __m, int __count) static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srl_pi32(__m64 __m, __m64 __count) { - return __trunc64(__builtin_ia32_psrld128((__v4si)__anyext128(__m), - (__v4si)__anyext128(__count))); + return __trunc64(__builtin_ia32_psrld128((__v4si)__zext128(__m), + (__v4si)__zext128(__count))); } /// Right-shifts each 32-bit integer element of a 64-bit integer vector @@ -1072,11 +1018,9 @@ _mm_srl_pi32(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_srli_pi32(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psrldi128((__v4si)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_pi32(__m64 __m, + int __count) { + return __trunc64(__builtin_ia32_psrldi128((__v4si)__zext128(__m), __count)); } /// Right-shifts the first 64-bit integer parameter by the number of bits @@ -1096,8 +1040,8 @@ _mm_srli_pi32(__m64 __m, int __count) static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srl_si64(__m64 __m, __m64 __count) { - return __trunc64(__builtin_ia32_psrlq128((__v2di)__anyext128(__m), - (__v2di)__anyext128(__count))); + return __trunc64(__builtin_ia32_psrlq128((__v2di)__zext128(__m), + (__v2di)__zext128(__count))); } /// Right-shifts the first parameter, which is a 64-bit integer, by the @@ -1115,11 +1059,9 @@ _mm_srl_si64(__m64 __m, __m64 __count) /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector containing the right-shifted value. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_srli_si64(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psrlqi128((__v2di)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_si64(__m64 __m, + int __count) { + return __trunc64(__builtin_ia32_psrlqi128((__v2di)__zext128(__m), __count)); } /// Performs a bitwise AND of two 64-bit integer vectors. @@ -1134,10 +1076,9 @@ _mm_srli_si64(__m64 __m, int __count) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise AND of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_and_si64(__m64 __m1, __m64 __m2) -{ - return (__m64)(((__v1du)__m1) & ((__v1du)__m2)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_and_si64(__m64 __m1, + __m64 __m2) { + return (__m64)(((__v1du)__m1) & ((__v1du)__m2)); } /// Performs a bitwise NOT of the first 64-bit integer vector, and then @@ -1155,10 +1096,9 @@ _mm_and_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise AND of the second /// parameter and the one's complement of the first parameter. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_andnot_si64(__m64 __m1, __m64 __m2) -{ - return (__m64)(~((__v1du)__m1) & ((__v1du)__m2)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_andnot_si64(__m64 __m1, + __m64 __m2) { + return (__m64)(~((__v1du)__m1) & ((__v1du)__m2)); } /// Performs a bitwise OR of two 64-bit integer vectors. @@ -1173,10 +1113,9 @@ _mm_andnot_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise OR of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_or_si64(__m64 __m1, __m64 __m2) -{ - return (__m64)(((__v1du)__m1) | ((__v1du)__m2)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_or_si64(__m64 __m1, + __m64 __m2) { + return (__m64)(((__v1du)__m1) | ((__v1du)__m2)); } /// Performs a bitwise exclusive OR of two 64-bit integer vectors. @@ -1191,10 +1130,9 @@ _mm_or_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_xor_si64(__m64 __m1, __m64 __m2) -{ - return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_xor_si64(__m64 __m1, + __m64 __m2) { + return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2)); } /// Compares the 8-bit integer elements of two 64-bit integer vectors of @@ -1213,10 +1151,9 @@ _mm_xor_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_cmpeq_pi8(__m64 __m1, __m64 __m2) -{ - return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi8(__m64 __m1, + __m64 __m2) { + return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2)); } /// Compares the 16-bit integer elements of two 64-bit integer vectors of @@ -1235,10 +1172,9 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_cmpeq_pi16(__m64 __m1, __m64 __m2) -{ - return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi16(__m64 __m1, + __m64 __m2) { + return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2)); } /// Compares the 32-bit integer elements of two 64-bit integer vectors of @@ -1257,10 +1193,9 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_cmpeq_pi32(__m64 __m1, __m64 __m2) -{ - return (__m64)(((__v2si)__m1) == ((__v2si)__m2)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi32(__m64 __m1, + __m64 __m2) { + return (__m64)(((__v2si)__m1) == ((__v2si)__m2)); } /// Compares the 8-bit integer elements of two 64-bit integer vectors of @@ -1279,9 +1214,8 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_cmpgt_pi8(__m64 __m1, __m64 __m2) -{ +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi8(__m64 __m1, + __m64 __m2) { /* This function always performs a signed comparison, but __v8qi is a char which may be signed or unsigned, so use __v8qs. */ return (__m64)((__v8qs)__m1 > (__v8qs)__m2); @@ -1303,10 +1237,9 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_cmpgt_pi16(__m64 __m1, __m64 __m2) -{ - return (__m64)((__v4hi)__m1 > (__v4hi)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi16(__m64 __m1, + __m64 __m2) { + return (__m64)((__v4hi)__m1 > (__v4hi)__m2); } /// Compares the 32-bit integer elements of two 64-bit integer vectors of @@ -1325,10 +1258,9 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_cmpgt_pi32(__m64 __m1, __m64 __m2) -{ - return (__m64)((__v2si)__m1 > (__v2si)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi32(__m64 __m1, + __m64 __m2) { + return (__m64)((__v2si)__m1 > (__v2si)__m2); } /// Constructs a 64-bit integer vector initialized to zero. @@ -1338,8 +1270,7 @@ _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) /// This intrinsic corresponds to the PXOR instruction. /// /// \returns An initialized 64-bit integer vector with all elements set to zero. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR -_mm_setzero_si64(void) { +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setzero_si64(void) { return __extension__(__m64){0LL}; } @@ -1358,8 +1289,8 @@ _mm_setzero_si64(void) { /// A 32-bit integer value used to initialize the lower 32 bits of the /// result. /// \returns An initialized 64-bit integer vector. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR -_mm_set_pi32(int __i1, int __i0) { +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set_pi32(int __i1, + int __i0) { return __extension__(__m64)(__v2si){__i0, __i1}; } @@ -1380,8 +1311,10 @@ _mm_set_pi32(int __i1, int __i0) { /// \param __s0 /// A 16-bit integer value used to initialize bits [15:0] of the result. /// \returns An initialized 64-bit integer vector. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR -_mm_set_pi16(short __s3, short __s2, short __s1, short __s0) { +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set_pi16(short __s3, + short __s2, + short __s1, + short __s0) { return __extension__(__m64)(__v4hi){__s0, __s1, __s2, __s3}; } @@ -1410,7 +1343,7 @@ _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) { /// \param __b0 /// An 8-bit integer value used to initialize bits [7:0] of the result. /// \returns An initialized 64-bit integer vector. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) { return __extension__(__m64)(__v8qi){__b0, __b1, __b2, __b3, @@ -1430,8 +1363,7 @@ _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, /// A 32-bit integer value used to initialize each vector element of the /// result. /// \returns An initialized 64-bit integer vector of [2 x i32]. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR -_mm_set1_pi32(int __i) { +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi32(int __i) { return _mm_set_pi32(__i, __i); } @@ -1448,8 +1380,7 @@ _mm_set1_pi32(int __i) { /// A 16-bit integer value used to initialize each vector element of the /// result. /// \returns An initialized 64-bit integer vector of [4 x i16]. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR -_mm_set1_pi16(short __w) { +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi16(short __w) { return _mm_set_pi16(__w, __w, __w, __w); } @@ -1465,8 +1396,7 @@ _mm_set1_pi16(short __w) { /// An 8-bit integer value used to initialize each vector element of the /// result. /// \returns An initialized 64-bit integer vector of [8 x i8]. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR -_mm_set1_pi8(char __b) { +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi8(char __b) { return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); } @@ -1485,8 +1415,8 @@ _mm_set1_pi8(char __b) { /// A 32-bit integer value used to initialize the upper 32 bits of the /// result. /// \returns An initialized 64-bit integer vector. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR -_mm_setr_pi32(int __i0, int __i1) { +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setr_pi32(int __i0, + int __i1) { return _mm_set_pi32(__i1, __i0); } @@ -1507,8 +1437,10 @@ _mm_setr_pi32(int __i0, int __i1) { /// \param __w3 /// A 16-bit integer value used to initialize bits [63:48] of the result. /// \returns An initialized 64-bit integer vector. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR -_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) { +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setr_pi16(short __w0, + short __w1, + short __w2, + short __w3) { return _mm_set_pi16(__w3, __w2, __w1, __w0); } @@ -1537,13 +1469,12 @@ _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) { /// \param __b7 /// An 8-bit integer value used to initialize bits [63:56] of the result. /// \returns An initialized 64-bit integer vector. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7) { return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); } -#undef __anyext128 #undef __trunc64 #undef __DEFAULT_FN_ATTRS_SSE2 diff --git a/lib/include/module.modulemap b/lib/include/module.modulemap index a72828625a..c13dd3fd48 100644 --- a/lib/include/module.modulemap +++ b/lib/include/module.modulemap @@ -171,8 +171,22 @@ module _Builtin_intrinsics [system] [extern_c] { // that module. The system float.h (if present) will be treated // as a textual header in the sytem module. module _Builtin_float [system] { - header "float.h" - export * + textual header "float.h" + + explicit module float { + header "__float_float.h" + export * + } + + explicit module header_macro { + header "__float_header_macro.h" + export * + } + + explicit module infinity_nan { + header "__float_infinity_nan.h" + export * + } } module _Builtin_inttypes [system] { @@ -239,6 +253,11 @@ module _Builtin_stdbool [system] { export * } +module _Builtin_stdckdint [system] { + header "stdckdint.h" + export * +} + module _Builtin_stdcountof [system] { header "stdcountof.h" export * @@ -329,13 +348,13 @@ module _Builtin_unwind [system] { } // End -fbuiltin-headers-in-system-modules affected modules -module opencl_c { +module opencl_c [system] { requires opencl header "opencl-c.h" header "opencl-c-base.h" } -module ptrauth { +module ptrauth [system] { header "ptrauth.h" export * } diff --git a/lib/include/movrs_avx10_2_512intrin.h b/lib/include/movrs_avx10_2_512intrin.h index 5cd907a597..75d7ce93db 100644 --- a/lib/include/movrs_avx10_2_512intrin.h +++ b/lib/include/movrs_avx10_2_512intrin.h @@ -17,8 +17,8 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS512 \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("movrs, avx10.2-512"), __min_vector_width__(512))) + __attribute__((__always_inline__, __nodebug__, __target__("movrs, avx10.2"), \ + __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_loadrs_epi8(void const *__A) { diff --git a/lib/include/movrs_avx10_2intrin.h b/lib/include/movrs_avx10_2intrin.h index 27b625b6b4..1c78b214fd 100644 --- a/lib/include/movrs_avx10_2intrin.h +++ b/lib/include/movrs_avx10_2intrin.h @@ -17,11 +17,11 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("movrs,avx10.2-256"), __min_vector_width__(128))) + __attribute__((__always_inline__, __nodebug__, __target__("movrs,avx10.2"), \ + __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("movrs,avx10.2-256"), __min_vector_width__(256))) + __attribute__((__always_inline__, __nodebug__, __target__("movrs,avx10.2"), \ + __min_vector_width__(256))) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_loadrs_epi8(void const *__A) { diff --git a/lib/include/pmmintrin.h b/lib/include/pmmintrin.h index cd605df7fb..a9a6544036 100644 --- a/lib/include/pmmintrin.h +++ b/lib/include/pmmintrin.h @@ -17,15 +17,9 @@ #include /* Define the default attributes for the functions in this file. */ -#if defined(__EVEX512__) && !defined(__AVX10_1_512__) -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("sse3,no-evex512"), __min_vector_width__(128))) -#else #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("sse3"), \ __min_vector_width__(128))) -#endif #if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr @@ -66,9 +60,8 @@ _mm_lddqu_si128(__m128i_u const *__p) /// A 128-bit vector of [4 x float] containing the right source operand. /// \returns A 128-bit vector of [4 x float] containing the alternating sums and /// differences of both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_addsub_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_addsub_ps(__m128 __a, __m128 __b) { return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b); } @@ -89,9 +82,8 @@ _mm_addsub_ps(__m128 __a, __m128 __b) /// destination. /// \returns A 128-bit vector of [4 x float] containing the horizontal sums of /// both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_hadd_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b); } @@ -112,9 +104,8 @@ _mm_hadd_ps(__m128 __a, __m128 __b) /// bits of the destination. /// \returns A 128-bit vector of [4 x float] containing the horizontal /// differences of both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_hsub_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b); } @@ -174,9 +165,8 @@ _mm_moveldup_ps(__m128 __a) /// A 128-bit vector of [2 x double] containing the right source operand. /// \returns A 128-bit vector of [2 x double] containing the alternating sums /// and differences of both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_addsub_pd(__m128d __a, __m128d __b) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_addsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b); } @@ -197,9 +187,8 @@ _mm_addsub_pd(__m128d __a, __m128d __b) /// destination. /// \returns A 128-bit vector of [2 x double] containing the horizontal sums of /// both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_hadd_pd(__m128d __a, __m128d __b) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadd_pd(__m128d __a, __m128d __b) { return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b); } @@ -220,9 +209,8 @@ _mm_hadd_pd(__m128d __a, __m128d __b) /// the destination. /// \returns A 128-bit vector of [2 x double] containing the horizontal /// differences of both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_hsub_pd(__m128d __a, __m128d __b) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b); } diff --git a/lib/include/ptrauth.h b/lib/include/ptrauth.h index 7f7d387cbd..ad28f06f09 100644 --- a/lib/include/ptrauth.h +++ b/lib/include/ptrauth.h @@ -95,7 +95,7 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t; __ptrauth qualifier; the compiler will perform this check automatically. */ -#if __has_feature(ptrauth_intrinsics) +#if __has_feature(ptrauth_intrinsics) || defined(__PTRAUTH__) /* Strip the signature from a value without authenticating it. @@ -241,6 +241,18 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t; #define ptrauth_type_discriminator(__type) \ __builtin_ptrauth_type_discriminator(__type) +/* Compute the constant discriminator used by Clang to sign pointers with the + given C function pointer type. + + A call to this function is an integer constant expression. */ +#if __has_feature(ptrauth_function_pointer_type_discrimination) +#define ptrauth_function_pointer_type_discriminator(__type) \ + __builtin_ptrauth_type_discriminator(__type) +#else +#define ptrauth_function_pointer_type_discriminator(__type) \ + ((ptrauth_extra_data_t)0) +#endif + /* Compute a signature for the given pair of pointer-sized values. The order of the arguments is significant. @@ -372,6 +384,8 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t; }) #define ptrauth_type_discriminator(__type) ((ptrauth_extra_data_t)0) +#define ptrauth_function_pointer_type_discriminator(__type) \ + ((ptrauth_extra_data_t)0) #define ptrauth_sign_generic_data(__value, __data) \ ({ \ @@ -388,6 +402,6 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t; #define __ptrauth_objc_isa_uintptr #define __ptrauth_objc_super_pointer -#endif /* __has_feature(ptrauth_intrinsics) */ +#endif /* __has_feature(ptrauth_intrinsics) || defined(__PTRAUTH__) */ #endif /* __PTRAUTH_H */ diff --git a/lib/include/riscv_mips.h b/lib/include/riscv_mips.h new file mode 100644 index 0000000000..124a989280 --- /dev/null +++ b/lib/include/riscv_mips.h @@ -0,0 +1,34 @@ +//===----- riscv_mips.h - RISC-V MIPS Intrinsic definitions +//----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __RISCV_MIPS_H +#define __RISCV_MIPS_H + +#if !defined(__riscv) +#error "This header is only meant to be used on riscv architecture" +#endif + +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("xmipsexectl"))) + +static __inline__ void __DEFAULT_FN_ATTRS __mips_pause() { + __builtin_riscv_mips_pause(); +} + +static __inline__ void __DEFAULT_FN_ATTRS __mips_ehb() { + __builtin_riscv_mips_ehb(); +} + +static __inline__ void __DEFAULT_FN_ATTRS __mips_ihb() { + __builtin_riscv_mips_ihb(); +} + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/include/riscv_nds.h b/lib/include/riscv_nds.h new file mode 100644 index 0000000000..29734c4383 --- /dev/null +++ b/lib/include/riscv_nds.h @@ -0,0 +1,89 @@ +/*===---- riscv_nds.h - Andes intrinsics -----------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RISCV_NDS_H +#define __RISCV_NDS_H + +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +#if defined(__riscv_xandesperf) + +#if __riscv_xlen == 32 + +static __inline__ int32_t __DEFAULT_FN_ATTRS __riscv_nds_ffb_32(uint32_t __a, + uint32_t __b) { + return __builtin_riscv_nds_ffb_32(__a, __b); +} + +static __inline__ int32_t __DEFAULT_FN_ATTRS +__riscv_nds_ffzmism_32(uint32_t __a, uint32_t __b) { + return __builtin_riscv_nds_ffzmism_32(__a, __b); +} + +static __inline__ int32_t __DEFAULT_FN_ATTRS +__riscv_nds_ffmism_32(uint32_t __a, uint32_t __b) { + return __builtin_riscv_nds_ffmism_32(__a, __b); +} + +static __inline__ int32_t __DEFAULT_FN_ATTRS +__riscv_nds_flmism_32(uint32_t __a, uint32_t __b) { + return __builtin_riscv_nds_flmism_32(__a, __b); +} + +#endif + +#if __riscv_xlen == 64 + +static __inline__ int64_t __DEFAULT_FN_ATTRS __riscv_nds_ffb_64(uint64_t __a, + uint64_t __b) { + return __builtin_riscv_nds_ffb_64(__a, __b); +} + +static __inline__ int64_t __DEFAULT_FN_ATTRS +__riscv_nds_ffzmism_64(uint64_t __a, uint64_t __b) { + return __builtin_riscv_nds_ffzmism_64(__a, __b); +} + +static __inline__ int64_t __DEFAULT_FN_ATTRS +__riscv_nds_ffmism_64(uint64_t __a, uint64_t __b) { + return __builtin_riscv_nds_ffmism_64(__a, __b); +} + +static __inline__ int64_t __DEFAULT_FN_ATTRS +__riscv_nds_flmism_64(uint64_t __a, uint64_t __b) { + return __builtin_riscv_nds_flmism_64(__a, __b); +} + +#endif + +#endif // defined(__riscv_xandesperf) + +#if defined(__riscv_xandesbfhcvt) + +static __inline__ float __DEFAULT_FN_ATTRS __riscv_nds_fcvt_s_bf16(__bf16 bf) { + return __builtin_riscv_nds_fcvt_s_bf16(bf); +} + +static __inline__ __bf16 __DEFAULT_FN_ATTRS __riscv_nds_fcvt_bf16_s(float sf) { + return __builtin_riscv_nds_fcvt_bf16_s(sf); +} + +#endif // defined(__riscv_xandesbfhcvt) + +#if defined(__cplusplus) +} +#endif + +#endif // define __RISCV_NDS_H diff --git a/lib/include/sifive_vector.h b/lib/include/sifive_vector.h index 4e67ad6fca..d315eb9609 100644 --- a/lib/include/sifive_vector.h +++ b/lib/include/sifive_vector.h @@ -47,9 +47,9 @@ __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint32_t)rs1, 32, 3, vl) #define __riscv_sf_vc_i_se_u8mf4(p27_26, p24_20, p11_7, simm5, vl) \ - __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 7, vl) -#define __riscv_sf_vc_i_se_u8mf2(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 6, vl) +#define __riscv_sf_vc_i_se_u8mf2(p27_26, p24_20, p11_7, simm5, vl) \ + __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 7, vl) #define __riscv_sf_vc_i_se_u8m1(p27_26, p24_20, p11_7, simm5, vl) \ __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 0, vl) #define __riscv_sf_vc_i_se_u8m2(p27_26, p24_20, p11_7, simm5, vl) \ @@ -115,4 +115,60 @@ #endif #endif +#define __riscv_sf_vsettnt_e8w1(atn) __riscv_sf_vsettnt(atn, 0, 1); +#define __riscv_sf_vsettnt_e8w2(atn) __riscv_sf_vsettnt(atn, 0, 2); +#define __riscv_sf_vsettnt_e8w4(atn) __riscv_sf_vsettnt(atn, 0, 3); +#define __riscv_sf_vsettnt_e16w1(atn) __riscv_sf_vsettnt(atn, 1, 1); +#define __riscv_sf_vsettnt_e16w2(atn) __riscv_sf_vsettnt(atn, 1, 2); +#define __riscv_sf_vsettnt_e16w4(atn) __riscv_sf_vsettnt(atn, 1, 3); +#define __riscv_sf_vsettnt_e32w1(atn) __riscv_sf_vsettnt(atn, 2, 1); +#define __riscv_sf_vsettnt_e32w2(atn) __riscv_sf_vsettnt(atn, 2, 2); +#define __riscv_sf_vsettm_e8w1(atm) __riscv_sf_vsettm(atm, 0, 1); +#define __riscv_sf_vsettm_e8w2(atm) __riscv_sf_vsettm(atm, 0, 2); +#define __riscv_sf_vsettm_e8w4(atm) __riscv_sf_vsettm(atm, 0, 3); +#define __riscv_sf_vsettm_e16w1(atm) __riscv_sf_vsettm(atm, 1, 1); +#define __riscv_sf_vsettm_e16w2(atm) __riscv_sf_vsettm(atm, 1, 2); +#define __riscv_sf_vsettm_e16w4(atm) __riscv_sf_vsettm(atm, 1, 3); +#define __riscv_sf_vsettm_e32w1(atm) __riscv_sf_vsettm(atm, 2, 1); +#define __riscv_sf_vsettm_e32w2(atm) __riscv_sf_vsettm(atm, 2, 2); +#define __riscv_sf_vsettn_e8w1(atn) __riscv_sf_vsettn(atn, 0, 1); +#define __riscv_sf_vsettn_e8w2(atn) __riscv_sf_vsettn(atn, 0, 2); +#define __riscv_sf_vsettn_e8w4(atn) __riscv_sf_vsettn(atn, 0, 3); +#define __riscv_sf_vsettn_e16w1(atn) __riscv_sf_vsettn(atn, 1, 1); +#define __riscv_sf_vsettn_e16w2(atn) __riscv_sf_vsettn(atn, 1, 2); +#define __riscv_sf_vsettn_e16w4(atn) __riscv_sf_vsettn(atn, 1, 3); +#define __riscv_sf_vsettn_e32w1(atn) __riscv_sf_vsettn(atn, 2, 1); +#define __riscv_sf_vsettn_e32w2(atn) __riscv_sf_vsettn(atn, 2, 2); +#define __riscv_sf_vsettk_e8w1(atk) __riscv_sf_vsettk(atk, 0, 1); +#define __riscv_sf_vsettk_e8w2(atk) __riscv_sf_vsettk(atk, 0, 2); +#define __riscv_sf_vsettk_e8w4(atk) __riscv_sf_vsettk(atk, 0, 3); +#define __riscv_sf_vsettk_e16w1(atk) __riscv_sf_vsettk(atk, 1, 1); +#define __riscv_sf_vsettk_e16w2(atk) __riscv_sf_vsettk(atk, 1, 2); +#define __riscv_sf_vsettk_e16w4(atk) __riscv_sf_vsettk(atk, 1, 3); +#define __riscv_sf_vsettk_e32w1(atk) __riscv_sf_vsettk(atk, 2, 1); +#define __riscv_sf_vsettk_e32w2(atk) __riscv_sf_vsettk(atk, 2, 2); +#define __riscv_sf_vtzero_t_e8w1(tile, atm, atn) \ + __riscv_sf_vtzero_t(tile, atm, atn, 3, 1); +#define __riscv_sf_vtzero_t_e8w2(tile, atm, atn) \ + __riscv_sf_vtzero_t(tile, atm, atn, 3, 2); +#define __riscv_sf_vtzero_t_e8w4(tile, atm, atn) \ + __riscv_sf_vtzero_t(tile, atm, atn, 3, 4); +#define __riscv_sf_vtzero_t_e16w1(tile, atm, atn) \ + __riscv_sf_vtzero_t(tile, atm, atn, 4, 1); +#define __riscv_sf_vtzero_t_e16w2(tile, atm, atn) \ + __riscv_sf_vtzero_t(tile, atm, atn, 4, 2); +#define __riscv_sf_vtzero_t_e16w4(tile, atm, atn) \ + __riscv_sf_vtzero_t(tile, atm, atn, 4, 4); +#define __riscv_sf_vtzero_t_e32w1(tile, atm, atn) \ + __riscv_sf_vtzero_t(tile, atm, atn, 5, 1); +#define __riscv_sf_vtzero_t_e32w2(tile, atm, atn) \ + __riscv_sf_vtzero_t(tile, atm, atn, 5, 2); +#if __riscv_v_elen >= 64 +#define __riscv_sf_vsettnt_e64w1(atn) __riscv_sf_vsettnt(atn, 3, 1); +#define __riscv_sf_vsettm_e64w1(atm) __riscv_sf_vsettm(atm, 3, 1); +#define __riscv_sf_vsettn_e64w1(atn) __riscv_sf_vsettn(atn, 3, 1); +#define __riscv_sf_vsettk_e64w1(atk) __riscv_sf_vsettk(atk, 3, 1); +#define __riscv_sf_vtzero_t_e64w1(tile, atm, atn) \ + __riscv_sf_vtzero_t(tile, atm, atn, 6, 1); +#endif #endif //_SIFIVE_VECTOR_H_ diff --git a/lib/include/sm4evexintrin.h b/lib/include/sm4evexintrin.h index f6ae0037ba..9c15d1fca9 100644 --- a/lib/include/sm4evexintrin.h +++ b/lib/include/sm4evexintrin.h @@ -14,8 +14,8 @@ #define __SM4EVEXINTRIN_H #define __DEFAULT_FN_ATTRS512 \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("sm4,avx10.2-512"), __min_vector_width__(512))) + __attribute__((__always_inline__, __nodebug__, __target__("sm4,avx10.2"), \ + __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sm4key4_epi32(__m512i __A, __m512i __B) { diff --git a/lib/include/smmintrin.h b/lib/include/smmintrin.h index bc6fe4c801..511a135375 100644 --- a/lib/include/smmintrin.h +++ b/lib/include/smmintrin.h @@ -17,14 +17,14 @@ #include /* Define the default attributes for the functions in this file. */ -#if defined(__EVEX512__) && !defined(__AVX10_1_512__) -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("sse4.1,no-evex512"), __min_vector_width__(128))) -#else #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"), \ __min_vector_width__(128))) + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS #endif /* SSE4 Rounding macros. */ @@ -439,9 +439,8 @@ /// position in the result. When a mask bit is 1, the corresponding 64-bit /// element in operand \a __V2 is copied to the same position in the result. /// \returns A 128-bit vector of [2 x double] containing the copied values. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_blendv_pd(__m128d __V1, - __m128d __V2, - __m128d __M) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_blendv_pd(__m128d __V1, __m128d __V2, __m128d __M) { return (__m128d)__builtin_ia32_blendvpd((__v2df)__V1, (__v2df)__V2, (__v2df)__M); } @@ -466,9 +465,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_blendv_pd(__m128d __V1, /// position in the result. When a mask bit is 1, the corresponding 32-bit /// element in operand \a __V2 is copied to the same position in the result. /// \returns A 128-bit vector of [4 x float] containing the copied values. -static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_blendv_ps(__m128 __V1, - __m128 __V2, - __m128 __M) { +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_blendv_ps(__m128 __V1, __m128 __V2, __m128 __M) { return (__m128)__builtin_ia32_blendvps((__v4sf)__V1, (__v4sf)__V2, (__v4sf)__M); } @@ -493,9 +491,8 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_blendv_ps(__m128 __V1, /// position in the result. When a mask bit is 1, the corresponding 8-bit /// element in operand \a __V2 is copied to the same position in the result. /// \returns A 128-bit vector of [16 x i8] containing the copied values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_blendv_epi8(__m128i __V1, - __m128i __V2, - __m128i __M) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_blendv_epi8(__m128i __V1, __m128i __V2, __m128i __M) { return (__m128i)__builtin_ia32_pblendvb128((__v16qi)__V1, (__v16qi)__V2, (__v16qi)__M); } @@ -542,8 +539,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_blendv_epi8(__m128i __V1, /// \param __V2 /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the products of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1, - __m128i __V2) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mullo_epi32(__m128i __V1, __m128i __V2) { return (__m128i)((__v4su)__V1 * (__v4su)__V2); } @@ -561,8 +558,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1, /// A 128-bit vector of [4 x i32]. /// \returns A 128-bit vector of [2 x i64] containing the products of both /// operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1, - __m128i __V2) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mul_epi32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_ia32_pmuldq128((__v4si)__V1, (__v4si)__V2); } @@ -669,8 +666,8 @@ _mm_stream_load_si128(const void *__V) { /// \param __V2 /// A 128-bit vector of [16 x i8] /// \returns A 128-bit vector of [16 x i8] containing the lesser values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8(__m128i __V1, - __m128i __V2) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_min_epi8(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_min((__v16qs)__V1, (__v16qs)__V2); } @@ -687,8 +684,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8(__m128i __V1, /// \param __V2 /// A 128-bit vector of [16 x i8]. /// \returns A 128-bit vector of [16 x i8] containing the greater values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8(__m128i __V1, - __m128i __V2) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_max_epi8(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_max((__v16qs)__V1, (__v16qs)__V2); } @@ -705,8 +702,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8(__m128i __V1, /// \param __V2 /// A 128-bit vector of [8 x u16]. /// \returns A 128-bit vector of [8 x u16] containing the lesser values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16(__m128i __V1, - __m128i __V2) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_min_epu16(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_min((__v8hu)__V1, (__v8hu)__V2); } @@ -723,8 +720,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16(__m128i __V1, /// \param __V2 /// A 128-bit vector of [8 x u16]. /// \returns A 128-bit vector of [8 x u16] containing the greater values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16(__m128i __V1, - __m128i __V2) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_max_epu16(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_max((__v8hu)__V1, (__v8hu)__V2); } @@ -741,8 +738,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16(__m128i __V1, /// \param __V2 /// A 128-bit vector of [4 x i32]. /// \returns A 128-bit vector of [4 x i32] containing the lesser values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32(__m128i __V1, - __m128i __V2) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_min_epi32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_min((__v4si)__V1, (__v4si)__V2); } @@ -759,8 +756,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32(__m128i __V1, /// \param __V2 /// A 128-bit vector of [4 x i32]. /// \returns A 128-bit vector of [4 x i32] containing the greater values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32(__m128i __V1, - __m128i __V2) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_max_epi32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_max((__v4si)__V1, (__v4si)__V2); } @@ -777,8 +774,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32(__m128i __V1, /// \param __V2 /// A 128-bit vector of [4 x u32]. /// \returns A 128-bit vector of [4 x u32] containing the lesser values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32(__m128i __V1, - __m128i __V2) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_min_epu32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_min((__v4su)__V1, (__v4su)__V2); } @@ -795,8 +792,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32(__m128i __V1, /// \param __V2 /// A 128-bit vector of [4 x u32]. /// \returns A 128-bit vector of [4 x u32] containing the greater values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32(__m128i __V1, - __m128i __V2) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_max_epu32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_max((__v4su)__V1, (__v4su)__V2); } @@ -1096,8 +1093,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32(__m128i __V1, /// \param __V /// A 128-bit integer vector selecting which bits to test in operand \a __M. /// \returns TRUE if the specified bits are all zeros; FALSE otherwise. -static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M, - __m128i __V) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_testz_si128(__m128i __M, __m128i __V) { return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V); } @@ -1113,8 +1110,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M, /// \param __V /// A 128-bit integer vector selecting which bits to test in operand \a __M. /// \returns TRUE if the specified bits are all ones; FALSE otherwise. -static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M, - __m128i __V) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_testc_si128(__m128i __M, __m128i __V) { return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V); } @@ -1131,8 +1128,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M, /// A 128-bit integer vector selecting which bits to test in operand \a __M. /// \returns TRUE if the specified bits are neither all zeros nor all ones; /// FALSE otherwise. -static __inline__ int __DEFAULT_FN_ATTRS _mm_testnzc_si128(__m128i __M, - __m128i __V) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_testnzc_si128(__m128i __M, __m128i __V) { return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V); } @@ -1205,8 +1202,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testnzc_si128(__m128i __M, /// \param __V2 /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi64(__m128i __V1, - __m128i __V2) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cmpeq_epi64(__m128i __V1, __m128i __V2) { return (__m128i)((__v2di)__V1 == (__v2di)__V2); } @@ -1224,7 +1221,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi64(__m128i __V1, /// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are /// sign-extended to 16-bit values. /// \returns A 128-bit vector of [8 x i16] containing the sign-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepi8_epi16(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m128i) __builtin_convertvector( @@ -1246,7 +1244,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V) { /// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are /// sign-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the sign-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi32(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepi8_epi32(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m128i) __builtin_convertvector( @@ -1266,7 +1265,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi32(__m128i __V) { /// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are /// sign-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi64(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepi8_epi64(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m128i) __builtin_convertvector( @@ -1286,7 +1286,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi64(__m128i __V) { /// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are /// sign-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the sign-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi32(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepi16_epi32(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si); } @@ -1304,7 +1305,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi32(__m128i __V) { /// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are /// sign-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi64(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepi16_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di); } @@ -1322,7 +1324,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi64(__m128i __V) { /// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are /// sign-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi64(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepi32_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di); } @@ -1341,7 +1344,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi64(__m128i __V) { /// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are /// zero-extended to 16-bit values. /// \returns A 128-bit vector of [8 x i16] containing the zero-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi16(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepu8_epi16(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), @@ -1361,7 +1365,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi16(__m128i __V) { /// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are /// zero-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the zero-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepu8_epi32(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si); } @@ -1379,7 +1384,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V) { /// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are /// zero-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepu8_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di); } @@ -1397,7 +1403,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V) { /// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are /// zero-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the zero-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepu16_epi32(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si); } @@ -1415,7 +1422,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V) { /// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are /// zero-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepu16_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di); } @@ -1433,7 +1441,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V) { /// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are /// zero-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepu32_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di); } @@ -1457,8 +1466,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) { /// A 128-bit vector of [4 x i32]. The converted [4 x i16] values are /// written to the higher 64 bits of the result. /// \returns A 128-bit vector of [8 x i16] containing the converted values. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi32(__m128i __V1, - __m128i __V2) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_packus_epi32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2); } @@ -1515,7 +1524,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi32(__m128i __V1, /// \returns A 128-bit value where bits [15:0] contain the minimum value found /// in parameter \a __V, bits [18:16] contain the index of the minimum value /// and the remaining bits are set to 0. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_minpos_epu16(__m128i __V) { return (__m128i)__builtin_ia32_phminposuw128((__v8hi)__V); } @@ -1525,9 +1535,16 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) { so we'll do the same. */ #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + /* These specify the type of data that we're comparing. */ #define _SIDD_UBYTE_OPS 0x00 #define _SIDD_UWORD_OPS 0x01 @@ -2320,12 +2337,13 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) { /// \param __V2 /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi64(__m128i __V1, - __m128i __V2) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cmpgt_epi64(__m128i __V1, __m128i __V2) { return (__m128i)((__v2di)__V1 > (__v2di)__V2); } #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #include diff --git a/lib/include/spirvintrin.h b/lib/include/spirvintrin.h new file mode 100644 index 0000000000..2a10a47ade --- /dev/null +++ b/lib/include/spirvintrin.h @@ -0,0 +1,194 @@ +//===-- spirvintrin.h - SPIR-V intrinsic functions ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __SPIRVINTRIN_H +#define __SPIRVINTRIN_H + +#ifndef __SPIRV__ +#error "This file is intended for SPIR-V targets or offloading to SPIR-V" +#endif + +#ifndef __GPUINTRIN_H +#error "Never use directly; include instead" +#endif + +_Pragma("omp begin declare target device_type(nohost)"); +_Pragma("omp begin declare variant match(device = {arch(spirv64)})"); + +// Type aliases to the address spaces used by the SPIR-V backend. +#define __gpu_private __attribute__((address_space(0))) +#define __gpu_constant __attribute__((address_space(2))) +#define __gpu_local __attribute__((address_space(3))) +#define __gpu_global __attribute__((address_space(1))) +#define __gpu_generic __attribute__((address_space(4))) + +// Attribute to declare a function as a kernel. +#define __gpu_kernel __attribute__((device_kernel, visibility("protected"))) + +// Returns the number of workgroups in the 'x' dimension of the grid. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) { + return __builtin_spirv_num_workgroups(0); +} + +// Returns the number of workgroups in the 'y' dimension of the grid. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_y(void) { + return __builtin_spirv_num_workgroups(1); +} + +// Returns the number of workgroups in the 'z' dimension of the grid. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_z(void) { + return __builtin_spirv_num_workgroups(2); +} + +// Returns the 'x' dimension of the current workgroup's id. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_x(void) { + return __builtin_spirv_workgroup_id(0); +} + +// Returns the 'y' dimension of the current workgroup's id. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_y(void) { + return __builtin_spirv_workgroup_id(1); +} + +// Returns the 'z' dimension of the current workgroup's id. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_z(void) { + return __builtin_spirv_workgroup_id(2); +} + +// Returns the number of workitems in the 'x' dimension. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_x(void) { + return __builtin_spirv_workgroup_size(0); +} + +// Returns the number of workitems in the 'y' dimension. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_y(void) { + return __builtin_spirv_workgroup_size(1); +} + +// Returns the number of workitems in the 'z' dimension. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_z(void) { + return __builtin_spirv_workgroup_size(2); +} + +// Returns the 'x' dimension id of the workitem in the current workgroup. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_x(void) { + return __builtin_spirv_local_invocation_id(0); +} + +// Returns the 'y' dimension id of the workitem in the current workgroup. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_y(void) { + return __builtin_spirv_local_invocation_id(1); +} + +// Returns the 'z' dimension id of the workitem in the current workgroup. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_z(void) { + return __builtin_spirv_local_invocation_id(2); +} + +// Returns the size of an wavefront, either 32 or 64 depending on hardware +// and compilation options. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_lanes(void) { + return __builtin_spirv_subgroup_size(); +} + +// Returns the id of the thread inside of an wavefront executing together. +_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_lane_id(void) { + return __builtin_spirv_subgroup_local_invocation_id(); +} + +// Returns the bit-mask of active threads in the current wavefront. This +// implementation is incorrect if the target uses more than 64 lanes. +_DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_lane_mask(void) { + uint32_t [[clang::ext_vector_type(4)]] __mask = + __builtin_spirv_subgroup_ballot(1); + return __builtin_bit_cast(uint64_t, + __builtin_shufflevector(__mask, __mask, 0, 1)); +} + +// Copies the value from the first active thread in the wavefront to the rest. +_DEFAULT_FN_ATTRS static __inline__ uint32_t +__gpu_read_first_lane_u32(uint64_t __lane_mask, uint32_t __x) { + return __builtin_spirv_subgroup_shuffle(__x, + __builtin_ctzg(__gpu_lane_mask())); +} + +// Returns a bitmask of threads in the current lane for which \p x is true. This +// implementation is incorrect if the target uses more than 64 lanes. +_DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_ballot(uint64_t __lane_mask, + bool __x) { + // The lane_mask & gives the nvptx semantics when lane_mask is a subset of + // the active threads. + uint32_t [[clang::ext_vector_type(4)]] __mask = + __builtin_spirv_subgroup_ballot(__x); + return __lane_mask & __builtin_bit_cast(uint64_t, __builtin_shufflevector( + __mask, __mask, 0, 1)); +} + +// Waits for all the threads in the block to converge and issues a fence. +_DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_threads(void) { + __builtin_spirv_group_barrier(); +} + +// Wait for all threads in the wavefront to converge, this is a noop on SPIR-V. +_DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_lane(uint64_t __lane_mask) { +} + +// Shuffles the the lanes inside the wavefront according to the given index. +_DEFAULT_FN_ATTRS static __inline__ uint32_t +__gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x, + uint32_t __width) { + uint32_t __lane = __idx + (__gpu_lane_id() & ~(__width - 1)); + return __builtin_spirv_subgroup_shuffle(__x, __lane); +} + +// Returns a bitmask marking all lanes that have the same value of __x. +_DEFAULT_FN_ATTRS static __inline__ uint64_t +__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) { + return __gpu_match_any_u32_impl(__lane_mask, __x); +} + +// Returns a bitmask marking all lanes that have the same value of __x. +_DEFAULT_FN_ATTRS static __inline__ uint64_t +__gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) { + return __gpu_match_any_u64_impl(__lane_mask, __x); +} + +// Returns the current lane mask if every lane contains __x. +_DEFAULT_FN_ATTRS static __inline__ uint64_t +__gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) { + return __gpu_match_all_u32_impl(__lane_mask, __x); +} + +// Returns the current lane mask if every lane contains __x. +_DEFAULT_FN_ATTRS static __inline__ uint64_t +__gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) { + return __gpu_match_all_u64_impl(__lane_mask, __x); +} + +// SPIR-V does not expose this, always return false. +_DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_local(void *ptr) { + return 0; +} + +// SPIR-V does not expose this, always return false. +_DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_private(void *ptr) { + return 0; +} + +// SPIR-V only supports 'OpTerminateInvocation' in fragment shaders. +_DEFAULT_FN_ATTRS [[noreturn]] static __inline__ void __gpu_exit(void) { + __builtin_trap(); +} + +// This is a no-op as SPIR-V does not support it. +_DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) {} + +_Pragma("omp end declare variant"); +_Pragma("omp end declare target"); + +#endif // __SPIRVINTRIN_H diff --git a/lib/include/stddefer.h b/lib/include/stddefer.h new file mode 100644 index 0000000000..162876ddfa --- /dev/null +++ b/lib/include/stddefer.h @@ -0,0 +1,19 @@ +/*===---- stddefer.h - Standard header for 'defer' -------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_STDDEFER_H +#define __CLANG_STDDEFER_H + +/* Provide 'defer' if '_Defer' is supported. */ +#ifdef __STDC_DEFER_TS25755__ +#define __STDC_VERSION_STDDEFER_H__ 202602L +#define defer _Defer +#endif + +#endif /* __CLANG_STDDEFER_H */ diff --git a/lib/include/tmmintrin.h b/lib/include/tmmintrin.h index 371cc82e3d..cb4b36ea73 100644 --- a/lib/include/tmmintrin.h +++ b/lib/include/tmmintrin.h @@ -17,21 +17,21 @@ #include /* Define the default attributes for the functions in this file. */ -#if defined(__EVEX512__) && !defined(__AVX10_1_512__) -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("ssse3,no-evex512"), __min_vector_width__(128))) -#else #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), \ __min_vector_width__(128))) -#endif #define __trunc64(x) \ (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0) -#define __anyext128(x) \ +#define __zext128(x) \ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ - 1, -1, -1) + 1, 2, 3) + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif /// Computes the absolute value of each of the packed 8-bit signed /// integers in the source operand and stores the 8-bit unsigned integer @@ -45,9 +45,7 @@ /// A 64-bit vector of [8 x i8]. /// \returns A 64-bit integer vector containing the absolute values of the /// elements in the operand. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_abs_pi8(__m64 __a) -{ +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi8(__m64 __a) { return (__m64)__builtin_elementwise_abs((__v8qs)__a); } @@ -63,10 +61,9 @@ _mm_abs_pi8(__m64 __a) /// A 128-bit vector of [16 x i8]. /// \returns A 128-bit integer vector containing the absolute values of the /// elements in the operand. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_abs_epi8(__m128i __a) -{ - return (__m128i)__builtin_elementwise_abs((__v16qs)__a); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_abs_epi8(__m128i __a) { + return (__m128i)__builtin_elementwise_abs((__v16qs)__a); } /// Computes the absolute value of each of the packed 16-bit signed @@ -81,10 +78,8 @@ _mm_abs_epi8(__m128i __a) /// A 64-bit vector of [4 x i16]. /// \returns A 64-bit integer vector containing the absolute values of the /// elements in the operand. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_abs_pi16(__m64 __a) -{ - return (__m64)__builtin_elementwise_abs((__v4hi)__a); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi16(__m64 __a) { + return (__m64)__builtin_elementwise_abs((__v4hi)__a); } /// Computes the absolute value of each of the packed 16-bit signed @@ -99,10 +94,9 @@ _mm_abs_pi16(__m64 __a) /// A 128-bit vector of [8 x i16]. /// \returns A 128-bit integer vector containing the absolute values of the /// elements in the operand. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_abs_epi16(__m128i __a) -{ - return (__m128i)__builtin_elementwise_abs((__v8hi)__a); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_abs_epi16(__m128i __a) { + return (__m128i)__builtin_elementwise_abs((__v8hi)__a); } /// Computes the absolute value of each of the packed 32-bit signed @@ -117,10 +111,8 @@ _mm_abs_epi16(__m128i __a) /// A 64-bit vector of [2 x i32]. /// \returns A 64-bit integer vector containing the absolute values of the /// elements in the operand. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_abs_pi32(__m64 __a) -{ - return (__m64)__builtin_elementwise_abs((__v2si)__a); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi32(__m64 __a) { + return (__m64)__builtin_elementwise_abs((__v2si)__a); } /// Computes the absolute value of each of the packed 32-bit signed @@ -135,10 +127,9 @@ _mm_abs_pi32(__m64 __a) /// A 128-bit vector of [4 x i32]. /// \returns A 128-bit integer vector containing the absolute values of the /// elements in the operand. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_abs_epi32(__m128i __a) -{ - return (__m128i)__builtin_elementwise_abs((__v4si)__a); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_abs_epi32(__m128i __a) { + return (__m128i)__builtin_elementwise_abs((__v4si)__a); } /// Concatenates the two 128-bit integer vector operands, and @@ -184,11 +175,12 @@ _mm_abs_epi32(__m128i __a) /// An immediate operand specifying how many bytes to right-shift the result. /// \returns A 64-bit integer vector containing the concatenated right-shifted /// value. -#define _mm_alignr_pi8(a, b, n) \ - ((__m64)__builtin_shufflevector( \ - __builtin_ia32_psrldqi128_byteshift( \ - __builtin_shufflevector((__v1di)(a), (__v1di)(b), 1, 0), \ - (n)), __extension__ (__v2di){}, 0)) +#define _mm_alignr_pi8(a, b, n) \ + ((__m64)__builtin_shufflevector( \ + (__v2di)__builtin_ia32_psrldqi128_byteshift( \ + (__v16qi)__builtin_shufflevector((__v1di)(a), (__v1di)(b), 1, 0), \ + (n)), \ + __extension__(__v2di){}, 0)) /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 128-bit vectors of [8 x i16]. @@ -207,10 +199,9 @@ _mm_abs_epi32(__m128i __a) /// destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of /// both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hadd_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadd_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -230,10 +221,9 @@ _mm_hadd_epi16(__m128i __a, __m128i __b) /// destination. /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of /// both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hadd_epi32(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadd_epi32(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -253,11 +243,10 @@ _mm_hadd_epi32(__m128i __a, __m128i __b) /// destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both /// operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hadd_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phaddw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phaddw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -277,11 +266,10 @@ _mm_hadd_pi16(__m64 __a, __m64 __b) /// destination. /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both /// operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hadd_pi32(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phaddd128( - (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi32(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phaddd128( + (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); } /// Horizontally adds, with saturation, the adjacent pairs of values contained @@ -304,10 +292,9 @@ _mm_hadd_pi32(__m64 __a, __m64 __b) /// destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated /// sums of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hadds_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadds_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally adds, with saturation, the adjacent pairs of values contained @@ -330,11 +317,10 @@ _mm_hadds_epi16(__m128i __a, __m128i __b) /// destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated /// sums of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hadds_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phaddsw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadds_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phaddsw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -354,10 +340,9 @@ _mm_hadds_pi16(__m64 __a, __m64 __b) /// the destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences /// of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hsub_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsub_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -377,10 +362,9 @@ _mm_hsub_epi16(__m128i __a, __m128i __b) /// the destination. /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences /// of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hsub_epi32(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsub_epi32(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -400,11 +384,10 @@ _mm_hsub_epi32(__m128i __a, __m128i __b) /// the destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences /// of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hsub_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phsubw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phsubw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -424,11 +407,10 @@ _mm_hsub_pi16(__m64 __a, __m64 __b) /// the destination. /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences /// of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hsub_pi32(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phsubd128( - (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi32(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phsubd128( + (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); } /// Horizontally subtracts, with saturation, the adjacent pairs of values @@ -451,10 +433,9 @@ _mm_hsub_pi32(__m64 __a, __m64 __b) /// the destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated /// differences of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hsubs_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsubs_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally subtracts, with saturation, the adjacent pairs of values @@ -477,11 +458,10 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b) /// the destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated /// differences of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hsubs_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phsubsw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsubs_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phsubsw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Multiplies corresponding pairs of packed 8-bit unsigned integer @@ -512,10 +492,9 @@ _mm_hsubs_pi16(__m64 __a, __m64 __b) /// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n /// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n /// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15) -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maddubs_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_maddubs_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); } /// Multiplies corresponding pairs of packed 8-bit unsigned integer @@ -542,11 +521,10 @@ _mm_maddubs_epi16(__m128i __a, __m128i __b) /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_maddubs_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__anyext128(__a), - (__v16qi)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_maddubs_pi16(__m64 __a, __m64 __b) { + return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__zext128(__a), + (__v16qi)__zext128(__b))); } /// Multiplies packed 16-bit signed integer values, truncates the 32-bit @@ -563,10 +541,9 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b) /// A 128-bit vector of [8 x i16] containing one of the source operands. /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled /// products of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mulhrs_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mulhrs_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); } /// Multiplies packed 16-bit signed integer values, truncates the 32-bit @@ -583,11 +560,10 @@ _mm_mulhrs_epi16(__m128i __a, __m128i __b) /// A 64-bit vector of [4 x i16] containing one of the source operands. /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled /// products of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_mulhrs_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__anyext128(__a), - (__v8hi)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mulhrs_pi16(__m64 __a, __m64 __b) { + return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__zext128(__a), + (__v8hi)__zext128(__b))); } /// Copies the 8-bit integers from a 128-bit integer vector to the @@ -610,10 +586,9 @@ _mm_mulhrs_pi16(__m64 __a, __m64 __b) /// Bits [6:4] Reserved. \n /// Bits [3:0] select the source byte to be copied. /// \returns A 128-bit integer vector containing the copied or cleared values. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_shuffle_epi8(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_shuffle_epi8(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); } /// Copies the 8-bit integers from a 64-bit integer vector to the @@ -635,13 +610,12 @@ _mm_shuffle_epi8(__m128i __a, __m128i __b) /// destination. \n /// Bits [2:0] select the source byte to be copied. /// \returns A 64-bit integer vector containing the copied or cleared values. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_shuffle_pi8(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_pshufb128( - (__v16qi)__builtin_shufflevector( - (__v2si)(__a), __extension__ (__v2si){}, 0, 1, 0, 1), - (__v16qi)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_shuffle_pi8(__m64 __a, __m64 __b) { + return __trunc64(__builtin_ia32_pshufb128( + (__v16qi)__builtin_shufflevector((__v2si)(__a), __extension__(__v2si){}, + 0, 1, 0, 1), + (__v16qi)__zext128(__b))); } /// For each 8-bit integer in the first source operand, perform one of @@ -664,10 +638,9 @@ _mm_shuffle_pi8(__m64 __a, __m64 __b) /// A 128-bit integer vector containing control bytes corresponding to /// positions in the destination. /// \returns A 128-bit integer vector containing the resultant values. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_sign_epi8(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sign_epi8(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); } /// For each 16-bit integer in the first source operand, perform one of @@ -690,10 +663,9 @@ _mm_sign_epi8(__m128i __a, __m128i __b) /// A 128-bit integer vector containing control words corresponding to /// positions in the destination. /// \returns A 128-bit integer vector containing the resultant values. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_sign_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sign_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); } /// For each 32-bit integer in the first source operand, perform one of @@ -716,10 +688,9 @@ _mm_sign_epi16(__m128i __a, __m128i __b) /// A 128-bit integer vector containing control doublewords corresponding to /// positions in the destination. /// \returns A 128-bit integer vector containing the resultant values. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_sign_epi32(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sign_epi32(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); } /// For each 8-bit integer in the first source operand, perform one of @@ -742,11 +713,10 @@ _mm_sign_epi32(__m128i __a, __m128i __b) /// A 64-bit integer vector containing control bytes corresponding to /// positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_sign_pi8(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_psignb128((__v16qi)__anyext128(__a), - (__v16qi)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi8(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_psignb128((__v16qi)__zext128(__a), + (__v16qi)__zext128(__b))); } /// For each 16-bit integer in the first source operand, perform one of @@ -769,11 +739,10 @@ _mm_sign_pi8(__m64 __a, __m64 __b) /// A 64-bit integer vector containing control words corresponding to /// positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_sign_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_psignw128((__v8hi)__anyext128(__a), - (__v8hi)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi16(__m64 __a, + __m64 __b) { + return __trunc64( + __builtin_ia32_psignw128((__v8hi)__zext128(__a), (__v8hi)__zext128(__b))); } /// For each 32-bit integer in the first source operand, perform one of @@ -796,15 +765,15 @@ _mm_sign_pi16(__m64 __a, __m64 __b) /// A 64-bit integer vector containing two control doublewords corresponding /// to positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_sign_pi32(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_psignd128((__v4si)__anyext128(__a), - (__v4si)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi32(__m64 __a, + __m64 __b) { + return __trunc64( + __builtin_ia32_psignd128((__v4si)__zext128(__a), (__v4si)__zext128(__b))); } -#undef __anyext128 +#undef __zext128 #undef __trunc64 #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif /* __TMMINTRIN_H */ diff --git a/lib/include/vaesintrin.h b/lib/include/vaesintrin.h index d7c162f5c0..5194ca6c50 100644 --- a/lib/include/vaesintrin.h +++ b/lib/include/vaesintrin.h @@ -19,8 +19,7 @@ /* Default attributes for ZMM forms. */ #define __DEFAULT_FN_ATTRS_F \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512f,evex512,vaes"), \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"), \ __min_vector_width__(512))) static __inline__ __m256i __DEFAULT_FN_ATTRS diff --git a/lib/include/xmmintrin.h b/lib/include/xmmintrin.h index 6a64369773..ab0f0c1690 100644 --- a/lib/include/xmmintrin.h +++ b/lib/include/xmmintrin.h @@ -16,7 +16,6 @@ #include -typedef int __v4si __attribute__((__vector_size__(16))); typedef float __v4sf __attribute__((__vector_size__(16))); typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16))); @@ -24,6 +23,8 @@ typedef float __m128_u __attribute__((__vector_size__(16), __aligned__(1))); /* Unsigned types */ typedef unsigned int __v4su __attribute__((__vector_size__(16))); +typedef unsigned short __v8hu __attribute__((__vector_size__(16))); +typedef unsigned char __v16qu __attribute__((__vector_size__(16))); /* This header should only be included in a hosted environment as it depends on * a standard library to provide allocation routines. */ @@ -32,21 +33,12 @@ typedef unsigned int __v4su __attribute__((__vector_size__(16))); #endif /* Define the default attributes for the functions in this file. */ -#if defined(__EVEX512__) && !defined(__AVX10_1_512__) -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("sse,no-evex512"), \ - __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS_SSE2 \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("sse2,no-evex512"), __min_vector_width__(128))) -#else #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("sse"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS_SSE2 \ __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \ __min_vector_width__(128))) -#endif #if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr @@ -239,10 +231,9 @@ _mm_div_ps(__m128 __a, __m128 __b) { /// used in the calculation. /// \returns A 128-bit vector of [4 x float] containing the square root of the /// value in the low-order bits of the operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_sqrt_ss(__m128 __a) -{ - return (__m128)__builtin_ia32_sqrtss((__v4sf)__a); +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ss(__m128 __a) { + __a[0] = __builtin_elementwise_sqrt(__a[0]); + return __a; } /// Calculates the square roots of the values stored in a 128-bit vector @@ -256,10 +247,8 @@ _mm_sqrt_ss(__m128 __a) /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the square roots of the /// values in the operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_sqrt_ps(__m128 __a) -{ - return __builtin_ia32_sqrtps((__v4sf)__a); +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ps(__m128 __a) { + return __builtin_elementwise_sqrt(__a); } /// Calculates the approximate reciprocal of the value stored in the @@ -352,9 +341,7 @@ _mm_rsqrt_ps(__m128 __a) /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// minimum value between both operands. The upper 96 bits are copied from /// the upper 96 bits of the first source operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_min_ss(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ss(__m128 __a, __m128 __b) { return __builtin_ia32_minss((__v4sf)__a, (__v4sf)__b); } @@ -373,9 +360,8 @@ _mm_min_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float] containing one of the operands. /// \returns A 128-bit vector of [4 x float] containing the minimum values /// between both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_min_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_minps((__v4sf)__a, (__v4sf)__b); } @@ -398,9 +384,7 @@ _mm_min_ps(__m128 __a, __m128 __b) /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// maximum value between both operands. The upper 96 bits are copied from /// the upper 96 bits of the first source operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_max_ss(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ss(__m128 __a, __m128 __b) { return __builtin_ia32_maxss((__v4sf)__a, (__v4sf)__b); } @@ -419,9 +403,8 @@ _mm_max_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float] containing one of the operands. /// \returns A 128-bit vector of [4 x float] containing the maximum values /// between both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_max_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_maxps((__v4sf)__a, (__v4sf)__b); } @@ -1688,7 +1671,7 @@ _mm_cvtsi64_ss(__m128 __a, long long __b) { /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// converted value of the second operand. The upper 64 bits are copied from /// the upper 64 bits of the first operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvtpi32_ps(__m128 __a, __m64 __b) { return (__m128)__builtin_shufflevector( @@ -1714,7 +1697,7 @@ _mm_cvtpi32_ps(__m128 __a, __m64 __b) /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// converted value from the second operand. The upper 64 bits are copied /// from the upper 64 bits of the first operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvt_pi2ps(__m128 __a, __m64 __b) { return _mm_cvtpi32_ps(__a, __b); @@ -2353,9 +2336,8 @@ void _mm_sfence(void); /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_max_pi16(__m64 __a, __m64 __b) -{ +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_max_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_elementwise_max((__v4hi)__a, (__v4hi)__b); } @@ -2372,9 +2354,8 @@ _mm_max_pi16(__m64 __a, __m64 __b) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_max_pu8(__m64 __a, __m64 __b) -{ +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_max_pu8(__m64 __a, __m64 __b) { return (__m64)__builtin_elementwise_max((__v8qu)__a, (__v8qu)__b); } @@ -2391,9 +2372,8 @@ _mm_max_pu8(__m64 __a, __m64 __b) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_min_pi16(__m64 __a, __m64 __b) -{ +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_min_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_elementwise_min((__v4hi)__a, (__v4hi)__b); } @@ -2410,9 +2390,8 @@ _mm_min_pi16(__m64 __a, __m64 __b) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_min_pu8(__m64 __a, __m64 __b) -{ +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_min_pu8(__m64 __a, __m64 __b) { return (__m64)__builtin_elementwise_min((__v8qu)__a, (__v8qu)__b); } @@ -2428,9 +2407,8 @@ _mm_min_pu8(__m64 __a, __m64 __b) /// A 64-bit integer vector containing the values with bits to be extracted. /// \returns The most significant bit from each 8-bit element in \a __a, /// written to bits [7:0]. -static __inline__ int __DEFAULT_FN_ATTRS_SSE2 -_mm_movemask_pi8(__m64 __a) -{ +static __inline__ int __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_movemask_pi8(__m64 __a) { return __builtin_ia32_pmovmskb128((__v16qi)__zext128(__a)); } @@ -2447,11 +2425,11 @@ _mm_movemask_pi8(__m64 __a) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the products of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_mulhi_pu16(__m64 __a, __m64 __b) { - return __trunc64(__builtin_ia32_pmulhuw128((__v8hi)__anyext128(__a), - (__v8hi)__anyext128(__b))); + return __trunc64(__builtin_ia32_pmulhuw128((__v8hu)__zext128(__a), + (__v8hu)__zext128(__b))); } /// Shuffles the 4 16-bit integers from a 64-bit integer vector to the @@ -2530,8 +2508,8 @@ _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p) // If there's a risk of spurious trap due to a 128-bit write, back up the // pointer by 8 bytes and shift values in registers to match. __p -= 8; - __d128 = __builtin_ia32_pslldqi128_byteshift((__v2di)__d128, 8); - __n128 = __builtin_ia32_pslldqi128_byteshift((__v2di)__n128, 8); + __d128 = (__m128i)__builtin_ia32_pslldqi128_byteshift((__v16qi)__d128, 8); + __n128 = (__m128i)__builtin_ia32_pslldqi128_byteshift((__v16qi)__n128, 8); } __builtin_ia32_maskmovdqu((__v16qi)__d128, (__v16qi)__n128, __p); @@ -2550,11 +2528,10 @@ _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the averages of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_avg_pu8(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_pavgb128((__v16qi)__anyext128(__a), - (__v16qi)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_avg_pu8(__m64 __a, __m64 __b) { + return __trunc64(__builtin_ia32_pavgb128((__v16qu)__zext128(__a), + (__v16qu)__zext128(__b))); } /// Computes the rounded averages of the packed unsigned 16-bit integer @@ -2570,11 +2547,10 @@ _mm_avg_pu8(__m64 __a, __m64 __b) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the averages of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_avg_pu16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_pavgw128((__v8hi)__anyext128(__a), - (__v8hi)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_avg_pu16(__m64 __a, __m64 __b) { + return __trunc64( + __builtin_ia32_pavgw128((__v8hu)__zext128(__a), (__v8hu)__zext128(__b))); } /// Subtracts the corresponding 8-bit unsigned integer values of the two @@ -2873,7 +2849,7 @@ _mm_movelh_ps(__m128 __a, __m128 __b) { /// from the corresponding elements in this operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvtpi16_ps(__m64 __a) { return __builtin_convertvector((__v4hi)__a, __v4sf); @@ -2891,7 +2867,7 @@ _mm_cvtpi16_ps(__m64 __a) /// destination are copied from the corresponding elements in this operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvtpu16_ps(__m64 __a) { return __builtin_convertvector((__v4hu)__a, __v4sf); @@ -2909,7 +2885,7 @@ _mm_cvtpu16_ps(__m64 __a) /// from the corresponding lower 4 elements in this operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvtpi8_ps(__m64 __a) { return __builtin_convertvector( @@ -2930,7 +2906,7 @@ _mm_cvtpi8_ps(__m64 __a) /// operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the source operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvtpu8_ps(__m64 __a) { return __builtin_convertvector( @@ -2954,7 +2930,7 @@ _mm_cvtpu8_ps(__m64 __a) /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// copied and converted values from the first operand. The upper 64 bits /// contain the copied and converted values from the second operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvtpi32x2_ps(__m64 __a, __m64 __b) { return __builtin_convertvector( @@ -3029,9 +3005,7 @@ _mm_cvtps_pi8(__m128 __a) /// \returns A 32-bit integer value. Bits [3:0] contain the sign bits from each /// single-precision floating-point element of the parameter. Bits [31:4] are /// set to zero. -static __inline__ int __DEFAULT_FN_ATTRS -_mm_movemask_ps(__m128 __a) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_ps(__m128 __a) { return __builtin_ia32_movmskps((__v4sf)__a); } diff --git a/lib/include/xopintrin.h b/lib/include/xopintrin.h index 976cdf4902..aba632f941 100644 --- a/lib/include/xopintrin.h +++ b/lib/include/xopintrin.h @@ -20,6 +20,14 @@ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(256))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#endif + static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) { @@ -182,13 +190,13 @@ _mm_hsubq_epi32(__m128i __A) return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C)); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) { return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C)); @@ -200,28 +208,28 @@ _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_rot_epi8(__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B); + return (__m128i)__builtin_elementwise_fshl((__v16qu)__A, (__v16qu)__A, (__v16qu)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_rot_epi16(__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B); + return (__m128i)__builtin_elementwise_fshl((__v8hu)__A, (__v8hu)__A, (__v8hu)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_rot_epi32(__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B); + return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_rot_epi64(__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B); + return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B); } #define _mm_roti_epi8(A, N) \ @@ -766,5 +774,7 @@ _mm256_frcz_pd(__m256d __A) #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS_CONSTEXPR +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif /* __XOPINTRIN_H */ diff --git a/lib/libcxx/include/__algorithm/all_of.h b/lib/libcxx/include/__algorithm/all_of.h index 6acc117fc4..9bdb20a0d7 100644 --- a/lib/libcxx/include/__algorithm/all_of.h +++ b/lib/libcxx/include/__algorithm/all_of.h @@ -10,24 +10,28 @@ #ifndef _LIBCPP___ALGORITHM_ALL_OF_H #define _LIBCPP___ALGORITHM_ALL_OF_H +#include <__algorithm/any_of.h> #include <__config> #include <__functional/identity.h> #include <__type_traits/invoke.h> +#include <__utility/forward.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool __all_of(_Iter __first, _Sent __last, _Pred& __pred, _Proj& __proj) { - for (; __first != __last; ++__first) { - if (!std::__invoke(__pred, std::__invoke(__proj, *__first))) - return false; - } - return true; + using _Ref = decltype(std::__invoke(__proj, *__first)); + auto __negated_pred = [&__pred](_Ref __arg) -> bool { return !std::__invoke(__pred, std::forward<_Ref>(__arg)); }; + return !std::__any_of(std::move(__first), std::move(__last), __negated_pred, __proj); } template @@ -39,4 +43,6 @@ all_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_ALL_OF_H diff --git a/lib/libcxx/include/__algorithm/comp.h b/lib/libcxx/include/__algorithm/comp.h index ab3c598418..38e2fb9f5e 100644 --- a/lib/libcxx/include/__algorithm/comp.h +++ b/lib/libcxx/include/__algorithm/comp.h @@ -11,6 +11,7 @@ #include <__config> #include <__type_traits/desugars_to.h> +#include <__type_traits/is_generic_transparent_comparator.h> #include <__type_traits/is_integral.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -48,6 +49,9 @@ inline const bool __desugars_to_v<__less_tag, __less<>, _Tp, _Tp> = true; template inline const bool __desugars_to_v<__totally_ordered_less_tag, __less<>, _Tp, _Tp> = is_integral<_Tp>::value; +template <> +inline const bool __is_generic_transparent_comparator_v<__less<> > = true; + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___ALGORITHM_COMP_H diff --git a/lib/libcxx/include/__algorithm/copy.h b/lib/libcxx/include/__algorithm/copy.h index ea98031df1..344a53e516 100644 --- a/lib/libcxx/include/__algorithm/copy.h +++ b/lib/libcxx/include/__algorithm/copy.h @@ -12,11 +12,10 @@ #include <__algorithm/copy_move_common.h> #include <__algorithm/for_each_segment.h> #include <__algorithm/min.h> +#include <__algorithm/specialized_algorithms.h> #include <__config> -#include <__fwd/bit_reference.h> #include <__iterator/iterator_traits.h> #include <__iterator/segmented_iterator.h> -#include <__memory/pointer_traits.h> #include <__type_traits/common_type.h> #include <__type_traits/enable_if.h> #include <__utility/move.h> @@ -38,124 +37,14 @@ copy(_InputIterator __first, _InputIterator __last, _OutputIterator __result); template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> __copy(_InIter, _Sent, _OutIter); -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_aligned( - __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - using _In = __bit_iterator<_Cp, _IsConst>; - using difference_type = typename _In::difference_type; - using __storage_type = typename _In::__storage_type; - - const int __bits_per_word = _In::__bits_per_word; - difference_type __n = __last - __first; - if (__n > 0) { - // do first word - if (__first.__ctz_ != 0) { - unsigned __clz = __bits_per_word - __first.__ctz_; - difference_type __dn = std::min(static_cast(__clz), __n); - __n -= __dn; - __storage_type __m = std::__middle_mask<__storage_type>(__clz - __dn, __first.__ctz_); - __storage_type __b = *__first.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b; - __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); - ++__first.__seg_; - // __first.__ctz_ = 0; - } - // __first.__ctz_ == 0; - // do middle words - __storage_type __nw = __n / __bits_per_word; - std::copy(std::__to_address(__first.__seg_), - std::__to_address(__first.__seg_ + __nw), - std::__to_address(__result.__seg_)); - __n -= __nw * __bits_per_word; - __result.__seg_ += __nw; - // do last word - if (__n > 0) { - __first.__seg_ += __nw; - __storage_type __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n); - __storage_type __b = *__first.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b; - __result.__ctz_ = static_cast(__n); - } - } - return __result; -} - -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_unaligned( - __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - using _In = __bit_iterator<_Cp, _IsConst>; - using difference_type = typename _In::difference_type; - using __storage_type = typename _In::__storage_type; - - const int __bits_per_word = _In::__bits_per_word; - difference_type __n = __last - __first; - if (__n > 0) { - // do first word - if (__first.__ctz_ != 0) { - unsigned __clz_f = __bits_per_word - __first.__ctz_; - difference_type __dn = std::min(static_cast(__clz_f), __n); - __n -= __dn; - __storage_type __m = std::__middle_mask<__storage_type>(__clz_f - __dn, __first.__ctz_); - __storage_type __b = *__first.__seg_ & __m; - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r); - __m = std::__middle_mask<__storage_type>(__clz_r - __ddn, __result.__ctz_); - *__result.__seg_ &= ~__m; - if (__result.__ctz_ > __first.__ctz_) - *__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_); - else - *__result.__seg_ |= __b >> (__first.__ctz_ - __result.__ctz_); - __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__ddn + __result.__ctz_) % __bits_per_word); - __dn -= __ddn; - if (__dn > 0) { - __m = std::__trailing_mask<__storage_type>(__bits_per_word - __dn); - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b >> (__first.__ctz_ + __ddn); - __result.__ctz_ = static_cast(__dn); - } - ++__first.__seg_; - // __first.__ctz_ = 0; - } - // __first.__ctz_ == 0; - // do middle words - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __m = std::__leading_mask<__storage_type>(__result.__ctz_); - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) { - __storage_type __b = *__first.__seg_; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b << __result.__ctz_; - ++__result.__seg_; - *__result.__seg_ &= __m; - *__result.__seg_ |= __b >> __clz_r; - } - // do last word - if (__n > 0) { - __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n); - __storage_type __b = *__first.__seg_ & __m; - __storage_type __dn = std::min(__n, static_cast(__clz_r)); - __m = std::__middle_mask<__storage_type>(__clz_r - __dn, __result.__ctz_); - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b << __result.__ctz_; - __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); - __n -= __dn; - if (__n > 0) { - __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n); - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b >> __dn; - __result.__ctz_ = static_cast(__n); - } - } - } - return __result; -} - struct __copy_impl { - template + template , + __single_iterator<_OutIter> >::__has_algorithm, + int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _Sent __last, _OutIter __result) const { while (__first != __last) { @@ -167,37 +56,39 @@ struct __copy_impl { return std::make_pair(std::move(__first), std::move(__result)); } - template - struct _CopySegment { - using _Traits _LIBCPP_NODEBUG = __segmented_iterator_traits<_InIter>; + template , + __single_iterator<_OutIter> >::__has_algorithm, + int> = 0> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static pair<_InIter, _OutIter> + operator()(_InIter __first, _Sent __last, _OutIter __result) { + return __specialized_algorithm<_Algorithm::__copy, __iterator_pair<_InIter, _Sent>, __single_iterator<_OutIter> >()( + std::move(__first), std::move(__last), std::move(__result)); + } - _OutIter& __result_; - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit _CopySegment(_OutIter& __result) - : __result_(__result) {} - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void - operator()(typename _Traits::__local_iterator __lfirst, typename _Traits::__local_iterator __llast) { - __result_ = std::__copy(__lfirst, __llast, std::move(__result_)).second; - } - }; - - template ::value, int> = 0> + template , int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _InIter __last, _OutIter __result) const { - std::__for_each_segment(__first, __last, _CopySegment<_InIter, _OutIter>(__result)); + using __local_iterator = typename __segmented_iterator_traits<_InIter>::__local_iterator; + std::__for_each_segment(__first, __last, [&__result](__local_iterator __lfirst, __local_iterator __llast) { + __result = std::__copy(std::move(__lfirst), std::move(__llast), std::move(__result)).second; + }); return std::make_pair(__last, std::move(__result)); } template ::value && - !__is_segmented_iterator<_InIter>::value && __is_segmented_iterator<_OutIter>::value, + !__is_segmented_iterator_v<_InIter> && __is_segmented_iterator_v<_OutIter>, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _InIter __last, _OutIter __result) const { using _Traits = __segmented_iterator_traits<_OutIter>; - using _DiffT = typename common_type<__iter_diff_t<_InIter>, __iter_diff_t<_OutIter> >::type; + using _DiffT = + typename common_type<__iterator_difference_type<_InIter>, __iterator_difference_type<_OutIter> >::type; if (__first == __last) return std::make_pair(std::move(__first), std::move(__result)); @@ -217,16 +108,6 @@ struct __copy_impl { } } - template - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, false> > - operator()(__bit_iterator<_Cp, _IsConst> __first, - __bit_iterator<_Cp, _IsConst> __last, - __bit_iterator<_Cp, false> __result) const { - if (__first.__ctz_ == __result.__ctz_) - return std::make_pair(__last, std::__copy_aligned(__first, __last, __result)); - return std::make_pair(__last, std::__copy_unaligned(__first, __last, __result)); - } - // At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer. template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*> diff --git a/lib/libcxx/include/__algorithm/copy_backward.h b/lib/libcxx/include/__algorithm/copy_backward.h index 9f890645a4..8758d2c9e7 100644 --- a/lib/libcxx/include/__algorithm/copy_backward.h +++ b/lib/libcxx/include/__algorithm/copy_backward.h @@ -11,6 +11,7 @@ #include <__algorithm/copy_move_common.h> #include <__algorithm/copy_n.h> +#include <__algorithm/for_each_segment.h> #include <__algorithm/iterator_operations.h> #include <__algorithm/min.h> #include <__config> @@ -170,37 +171,20 @@ struct __copy_backward_impl { return std::make_pair(std::move(__original_last_iter), std::move(__result)); } - template ::value, int> = 0> + template , int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _InIter __last, _OutIter __result) const { - using _Traits = __segmented_iterator_traits<_InIter>; - auto __sfirst = _Traits::__segment(__first); - auto __slast = _Traits::__segment(__last); - if (__sfirst == __slast) { - auto __iters = - std::__copy_backward<_AlgPolicy>(_Traits::__local(__first), _Traits::__local(__last), std::move(__result)); - return std::make_pair(__last, __iters.second); - } - - __result = - std::__copy_backward<_AlgPolicy>(_Traits::__begin(__slast), _Traits::__local(__last), std::move(__result)) - .second; - --__slast; - while (__sfirst != __slast) { - __result = - std::__copy_backward<_AlgPolicy>(_Traits::__begin(__slast), _Traits::__end(__slast), std::move(__result)) - .second; - --__slast; - } - __result = std::__copy_backward<_AlgPolicy>(_Traits::__local(__first), _Traits::__end(__slast), std::move(__result)) - .second; + using __local_iterator = typename __segmented_iterator_traits<_InIter>::__local_iterator; + std::__for_each_segment_backward(__first, __last, [&__result](__local_iterator __lfirst, __local_iterator __llast) { + __result = std::__copy_backward<_AlgPolicy>(std::move(__lfirst), std::move(__llast), std::move(__result)).second; + }); return std::make_pair(__last, std::move(__result)); } template ::value && - !__is_segmented_iterator<_InIter>::value && __is_segmented_iterator<_OutIter>::value, + !__is_segmented_iterator_v<_InIter> && __is_segmented_iterator_v<_OutIter>, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _InIter __last, _OutIter __result) const { @@ -214,7 +198,8 @@ struct __copy_backward_impl { auto __local_last = _Traits::__local(__result); while (true) { - using _DiffT = typename common_type<__iter_diff_t<_InIter>, __iter_diff_t<_OutIter> >::type; + using _DiffT = + typename common_type<__iterator_difference_type<_InIter>, __iterator_difference_type<_OutIter> >::type; auto __local_first = _Traits::__begin(__segment_iterator); auto __size = std::min<_DiffT>(__local_last - __local_first, __last - __first); diff --git a/lib/libcxx/include/__algorithm/copy_n.h b/lib/libcxx/include/__algorithm/copy_n.h index f93f39203a..56fb44811f 100644 --- a/lib/libcxx/include/__algorithm/copy_n.h +++ b/lib/libcxx/include/__algorithm/copy_n.h @@ -10,31 +10,63 @@ #define _LIBCPP___ALGORITHM_COPY_N_H #include <__algorithm/copy.h> +#include <__algorithm/iterator_operations.h> #include <__config> #include <__iterator/iterator_traits.h> #include <__type_traits/enable_if.h> #include <__utility/convert_to_integral.h> +#include <__utility/move.h> +#include <__utility/pair.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD +template ::value, int> = 0> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InIter, _OutIter> +__copy_n(_InIter __first, typename _IterOps<_AlgPolicy>::template __difference_type<_InIter> __n, _OutIter __result) { + return std::__copy(__first, __first + __n, std::move(__result)); +} + +template ::value, int> = 0> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InIter, _OutIter> +__copy_n(_InIter __first, typename _IterOps<_AlgPolicy>::template __difference_type<_InIter> __n, _OutIter __result) { + while (__n != 0) { + *__result = *__first; + ++__first; + ++__result; + --__n; + } + return std::make_pair(std::move(__first), std::move(__result)); +} + +// The InputIterator case is handled specially here because it's been written in a way to avoid incrementing __first +// if not absolutely required. This was done to allow its use with istream_iterator and we want to avoid breaking +// people, at least currently. +// See https://github.com/llvm/llvm-project/commit/99847d2bf132854fffa019bab19818768102ccad template ::value && - !__has_random_access_iterator_category<_InputIterator>::value, - int> = 0> -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator -copy_n(_InputIterator __first, _Size __orig_n, _OutputIterator __result) { - typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize; - _IntegralSize __n = __orig_n; - if (__n > 0) { + __enable_if_t<__has_exactly_input_iterator_category<_InputIterator>::value, int> = 0> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator +copy_n(_InputIterator __first, _Size __n, _OutputIterator __result) { + using _IntegralSize = decltype(std::__convert_to_integral(__n)); + _IntegralSize __converted = __n; + if (__converted > 0) { *__result = *__first; ++__result; - for (--__n; __n > 0; --__n) { + for (--__converted; __converted > 0; --__converted) { ++__first; *__result = *__first; ++__result; @@ -46,15 +78,17 @@ copy_n(_InputIterator __first, _Size __orig_n, _OutputIterator __result) { template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator -copy_n(_InputIterator __first, _Size __orig_n, _OutputIterator __result) { - typedef typename iterator_traits<_InputIterator>::difference_type difference_type; - typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize; - _IntegralSize __n = __orig_n; - return std::copy(__first, __first + difference_type(__n), __result); + __enable_if_t::value, int> = 0> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator +copy_n(_InputIterator __first, _Size __n, _OutputIterator __result) { + using _IntegralSize = decltype(std::__convert_to_integral(__n)); + _IntegralSize __converted = __n; + return std::__copy_n<_ClassicAlgPolicy>(__first, __iterator_difference_type<_InputIterator>(__converted), __result) + .second; } _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_COPY_N_H diff --git a/lib/libcxx/include/__algorithm/count.h b/lib/libcxx/include/__algorithm/count.h index 0cbe9b6e61..8529d110a3 100644 --- a/lib/libcxx/include/__algorithm/count.h +++ b/lib/libcxx/include/__algorithm/count.h @@ -72,7 +72,7 @@ __count_bool(__bit_iterator<_Cp, _IsConst> __first, typename __size_difference_t } template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iter_diff_t<__bit_iterator<_Cp, _IsConst> > +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iterator_difference_type<__bit_iterator<_Cp, _IsConst> > __count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value, _Proj&) { if (__value) return std::__count_bool( @@ -82,7 +82,7 @@ __count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __l } template -[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iter_diff_t<_InputIterator> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iterator_difference_type<_InputIterator> count(_InputIterator __first, _InputIterator __last, const _Tp& __value) { __identity __proj; return std::__count<_ClassicAlgPolicy>(__first, __last, __value, __proj); diff --git a/lib/libcxx/include/__algorithm/equal.h b/lib/libcxx/include/__algorithm/equal.h index 5a8c9504ed..957cc29759 100644 --- a/lib/libcxx/include/__algorithm/equal.h +++ b/lib/libcxx/include/__algorithm/equal.h @@ -160,22 +160,28 @@ template , int> = 0> + class _Proj1, + class _Proj2, + __enable_if_t<__is_identity<_Proj1>::value && __is_identity<_Proj2>::value && + __desugars_to_v<__equal_tag, _BinaryPredicate, bool, bool>, + int> = 0> [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl( __bit_iterator<_Cp, _IsConst1> __first1, __bit_iterator<_Cp, _IsConst1> __last1, __bit_iterator<_Cp, _IsConst2> __first2, - _BinaryPredicate) { + _BinaryPredicate, + _Proj1&, + _Proj2&) { if (__first1.__ctz_ == __first2.__ctz_) return std::__equal_aligned(__first1, __last1, __first2); return std::__equal_unaligned(__first1, __last1, __first2); } -template +template [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl( - _InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate& __pred) { + _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { for (; __first1 != __last1; ++__first1, (void)++__first2) - if (!__pred(*__first1, *__first2)) + if (!std::__invoke(__pred, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2))) return false; return true; } @@ -183,19 +189,23 @@ template template && !is_volatile<_Tp>::value && - !is_volatile<_Up>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value, + class _Proj1, + class _Proj2, + __enable_if_t<__is_identity<_Proj1>::value && __is_identity<_Proj2>::value && + __desugars_to_v<__equal_tag, _BinaryPredicate, _Tp, _Up> && !is_volatile<_Tp>::value && + !is_volatile<_Up>::value && __is_trivially_equality_comparable_v<_Tp, _Up>, int> = 0> [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool -__equal_iter_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _BinaryPredicate&) { +__equal_iter_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _BinaryPredicate&, _Proj1&, _Proj2&) { return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1)); } template [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate __pred) { + __identity __proj; return std::__equal_iter_impl( - std::__unwrap_iter(__first1), std::__unwrap_iter(__last1), std::__unwrap_iter(__first2), __pred); + std::__unwrap_iter(__first1), std::__unwrap_iter(__last1), std::__unwrap_iter(__first2), __pred, __proj, __proj); } template @@ -206,52 +216,28 @@ equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first #if _LIBCPP_STD_VER >= 14 -template +template [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl( _Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred& __comp, _Proj1& __proj1, _Proj2& __proj2) { - while (__first1 != __last1 && __first2 != __last2) { - if (!std::__invoke(__comp, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2))) - return false; - ++__first1; - ++__first2; + if constexpr (__known_equal_length) { + return std::__equal_iter_impl( + std::move(__first1), std::move(__last1), std::move(__first2), __comp, __proj1, __proj2); + } else { + while (__first1 != __last1 && __first2 != __last2) { + if (!std::__invoke(__comp, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2))) + return false; + ++__first1; + ++__first2; + } + return __first1 == __last1 && __first2 == __last2; } - return __first1 == __last1 && __first2 == __last2; -} - -template && __is_identity<_Proj1>::value && - __is_identity<_Proj2>::value && !is_volatile<_Tp>::value && !is_volatile<_Up>::value && - __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value, - int> = 0> -[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool -__equal_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _Up*, _Pred&, _Proj1&, _Proj2&) { - return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1)); -} - -template && __is_identity<_Proj1>::value && - __is_identity<_Proj2>::value, - int> = 0> -[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl( - __bit_iterator<_Cp, _IsConst1> __first1, - __bit_iterator<_Cp, _IsConst1> __last1, - __bit_iterator<_Cp, _IsConst2> __first2, - __bit_iterator<_Cp, _IsConst2>, - _Pred&, - _Proj1&, - _Proj2&) { - if (__first1.__ctz_ == __first2.__ctz_) - return std::__equal_aligned(__first1, __last1, __first2); - return std::__equal_unaligned(__first1, __last1, __first2); } template @@ -261,13 +247,15 @@ equal(_InputIterator1 __first1, _InputIterator2 __first2, _InputIterator2 __last2, _BinaryPredicate __pred) { - if constexpr (__has_random_access_iterator_category<_InputIterator1>::value && - __has_random_access_iterator_category<_InputIterator2>::value) { + static constexpr bool __both_random_access = + __has_random_access_iterator_category<_InputIterator1>::value && + __has_random_access_iterator_category<_InputIterator2>::value; + if constexpr (__both_random_access) { if (std::distance(__first1, __last1) != std::distance(__first2, __last2)) return false; } __identity __proj; - return std::__equal_impl( + return std::__equal_impl<__both_random_access>( std::__unwrap_iter(__first1), std::__unwrap_iter(__last1), std::__unwrap_iter(__first2), diff --git a/lib/libcxx/include/__algorithm/fill.h b/lib/libcxx/include/__algorithm/fill.h index 1ce3eadb01..37732cc22a 100644 --- a/lib/libcxx/include/__algorithm/fill.h +++ b/lib/libcxx/include/__algorithm/fill.h @@ -10,8 +10,12 @@ #define _LIBCPP___ALGORITHM_FILL_H #include <__algorithm/fill_n.h> +#include <__algorithm/for_each_segment.h> #include <__config> #include <__iterator/iterator_traits.h> +#include <__iterator/segmented_iterator.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/is_same.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -21,23 +25,37 @@ _LIBCPP_BEGIN_NAMESPACE_STD // fill isn't specialized for std::memset, because the compiler already optimizes the loop to a call to std::memset. -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void -__fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, forward_iterator_tag) { +template +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +__fill(_ForwardIterator __first, _Sentinel __last, const _Tp& __value) { +#ifndef _LIBCPP_CXX03_LANG + if constexpr (is_same<_ForwardIterator, _Sentinel>::value && __is_segmented_iterator_v<_ForwardIterator>) { + using __local_iterator_t = typename __segmented_iterator_traits<_ForwardIterator>::__local_iterator; + std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) { + std::__fill(__lfirst, __llast, __value); + }); + return __last; + } +#endif for (; __first != __last; ++__first) *__first = __value; + return __first; } -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void -__fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& __value, random_access_iterator_tag) { - std::fill_n(__first, __last - __first, __value); +template ::value && + !__is_segmented_iterator_v<_RandomAccessIterator>, + int> = 0> +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator +__fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& __value) { + return std::__fill_n(__first, __last - __first, __value); } template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { - std::__fill(__first, __last, __value, typename iterator_traits<_ForwardIterator>::iterator_category()); + std::__fill(__first, __last, __value); } _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__algorithm/fill_n.h b/lib/libcxx/include/__algorithm/fill_n.h index 0da78e1f38..3d06ea4f08 100644 --- a/lib/libcxx/include/__algorithm/fill_n.h +++ b/lib/libcxx/include/__algorithm/fill_n.h @@ -9,11 +9,14 @@ #ifndef _LIBCPP___ALGORITHM_FILL_N_H #define _LIBCPP___ALGORITHM_FILL_N_H -#include <__algorithm/min.h> +#include <__algorithm/for_each_n_segment.h> +#include <__algorithm/specialized_algorithms.h> #include <__config> -#include <__fwd/bit_reference.h> -#include <__memory/pointer_traits.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/segmented_iterator.h> +#include <__type_traits/enable_if.h> #include <__utility/convert_to_integral.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -26,56 +29,39 @@ _LIBCPP_BEGIN_NAMESPACE_STD // fill_n isn't specialized for std::memset, because the compiler already optimizes the loop to a call to std::memset. -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator -__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value); - -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void -__fill_n_bool(__bit_iterator<_Cp, false> __first, typename __size_difference_type_traits<_Cp>::size_type __n) { - using _It = __bit_iterator<_Cp, false>; - using __storage_type = typename _It::__storage_type; - - const int __bits_per_word = _It::__bits_per_word; - // do first partial word - if (__first.__ctz_ != 0) { - __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); - __storage_type __dn = std::min(__clz_f, __n); - std::__fill_masked_range(std::__to_address(__first.__seg_), __clz_f - __dn, __first.__ctz_, _FillVal); - __n -= __dn; - ++__first.__seg_; - } - // do middle whole words - __storage_type __nw = __n / __bits_per_word; - std::__fill_n(std::__to_address(__first.__seg_), __nw, _FillVal ? static_cast<__storage_type>(-1) : 0); - __n -= __nw * __bits_per_word; - // do last partial word - if (__n > 0) { - __first.__seg_ += __nw; - std::__fill_masked_range(std::__to_address(__first.__seg_), __bits_per_word - __n, 0u, _FillVal); - } -} - -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false> -__fill_n(__bit_iterator<_Cp, false> __first, _Size __n, const bool& __value) { - if (__n > 0) { - if (__value) - std::__fill_n_bool(__first, __n); - else - std::__fill_n_bool(__first, __n); - } - return __first + __n; -} - -template +template < + class _OutputIterator, + class _Size, + class _Tp, + __enable_if_t >::__has_algorithm, + int> = 0> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator __fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) { +#ifndef _LIBCPP_CXX03_LANG + if constexpr (__is_segmented_iterator_v<_OutputIterator>) { + using __local_iterator = typename __segmented_iterator_traits<_OutputIterator>::__local_iterator; + if constexpr (__has_random_access_iterator_category<__local_iterator>::value) { + return std::__for_each_n_segment(__first, __n, [&](__local_iterator __lfirst, __local_iterator __llast) { + std::__fill_n(__lfirst, __llast - __lfirst, __value); + }); + } + } +#endif for (; __n > 0; ++__first, (void)--__n) *__first = __value; return __first; } +template >::__has_algorithm, + int> = 0> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutIter __fill_n(_OutIter __first, _Size __n, const _Tp& __value) { + return __specialized_algorithm<_Algorithm::__fill_n, __single_iterator<_OutIter> >()( + std::move(__first), __n, __value); +} + template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) { diff --git a/lib/libcxx/include/__algorithm/find.h b/lib/libcxx/include/__algorithm/find.h index a7d9374b3a..852bc2da3e 100644 --- a/lib/libcxx/include/__algorithm/find.h +++ b/lib/libcxx/include/__algorithm/find.h @@ -12,16 +12,19 @@ #include <__algorithm/find_segment_if.h> #include <__algorithm/min.h> +#include <__algorithm/simd_utils.h> #include <__algorithm/unwrap_iter.h> #include <__bit/countr.h> #include <__bit/invert_if.h> #include <__config> +#include <__cstddef/size_t.h> #include <__functional/identity.h> #include <__fwd/bit_reference.h> #include <__iterator/segmented_iterator.h> #include <__string/constexpr_c_functions.h> #include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> +#include <__type_traits/is_constant_evaluated.h> #include <__type_traits/is_equality_comparable.h> #include <__type_traits/is_integral.h> #include <__type_traits/is_signed.h> @@ -44,46 +47,108 @@ _LIBCPP_BEGIN_NAMESPACE_STD // generic implementation template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iter -__find(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) { +__find_loop(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) { for (; __first != __last; ++__first) if (std::__invoke(__proj, *__first) == __value) break; return __first; } +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iter +__find(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) { + return std::__find_loop(std::move(__first), std::move(__last), __value, __proj); +} + +#if _LIBCPP_VECTORIZE_ALGORITHMS +template +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI +_LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __find_vectorized(_Tp* __first, _Tp* __last, _Up __value) { + if (!__libcpp_is_constant_evaluated()) { + constexpr size_t __unroll_count = 4; + constexpr size_t __vec_size = __native_vector_size<_Tp>; + using __vec = __simd_vector<_Tp, __vec_size>; + + auto __orig_first = __first; + + auto __values = static_cast<__simd_vector<_Tp, __vec_size>>(__value); // broadcast the value + while (static_cast(__last - __first) >= __unroll_count * __vec_size) [[__unlikely__]] { + __vec __lhs[__unroll_count]; + + for (size_t __i = 0; __i != __unroll_count; ++__i) + __lhs[__i] = std::__load_vector<__vec>(__first + __i * __vec_size); + + for (size_t __i = 0; __i != __unroll_count; ++__i) { + if (auto __cmp_res = __lhs[__i] == __values; std::__any_of(__cmp_res)) { + auto __offset = __i * __vec_size + std::__find_first_set(__cmp_res); + return __first + __offset; + } + } + + __first += __unroll_count * __vec_size; + } + + // check the remaining 0-3 vectors + while (static_cast(__last - __first) >= __vec_size) { + if (auto __cmp_res = std::__load_vector<__vec>(__first) == __values; std::__any_of(__cmp_res)) { + return __first + std::__find_first_set(__cmp_res); + } + __first += __vec_size; + } + + if (__last - __first == 0) + return __first; + + // Check if we can load elements in front of the current pointer. If that's the case load a vector at + // (last - vector_size) to check the remaining elements + if (static_cast(__first - __orig_first) >= __vec_size) { + __first = __last - __vec_size; + return __first + std::__find_first_set(std::__load_vector<__vec>(__first) == __values); + } + } + + __identity __proj; + return std::__find_loop(__first, __last, __value, __proj); +} +#endif + +#ifndef _LIBCPP_CXX03_LANG // trivially equality comparable implementations template ::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value && - sizeof(_Tp) == 1, - int> = 0> + __enable_if_t<__is_identity<_Proj>::value && __is_trivially_equality_comparable_v<_Tp, _Up>, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __find(_Tp* __first, _Tp* __last, const _Up& __value, _Proj&) { - if (auto __ret = std::__constexpr_memchr(__first, __value, __last - __first)) - return __ret; - return __last; + if constexpr (sizeof(_Tp) == 1) { + if (auto __ret = std::__constexpr_memchr(__first, __value, __last - __first)) + return __ret; + return __last; + } +# if _LIBCPP_HAS_WIDE_CHARACTERS + else if constexpr (sizeof(_Tp) == sizeof(wchar_t) && _LIBCPP_ALIGNOF(_Tp) >= _LIBCPP_ALIGNOF(wchar_t)) { + if (auto __ret = std::__constexpr_wmemchr(__first, __value, __last - __first)) + return __ret; + return __last; + } +# endif +# if _LIBCPP_VECTORIZE_ALGORITHMS + else if constexpr (is_integral<_Tp>::value) { + return std::__find_vectorized(__first, __last, __value); + } +# endif + else { + __identity __proj; + return std::__find_loop(__first, __last, __value, __proj); + } } - -#if _LIBCPP_HAS_WIDE_CHARACTERS -template ::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value && - sizeof(_Tp) == sizeof(wchar_t) && _LIBCPP_ALIGNOF(_Tp) >= _LIBCPP_ALIGNOF(wchar_t), - int> = 0> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __find(_Tp* __first, _Tp* __last, const _Up& __value, _Proj&) { - if (auto __ret = std::__constexpr_wmemchr(__first, __value, __last - __first)) - return __ret; - return __last; -} -#endif // _LIBCPP_HAS_WIDE_CHARACTERS +#endif // TODO: This should also be possible to get right with different signedness // cast integral types to allow vectorization template ::value && !__libcpp_is_trivially_equality_comparable<_Tp, _Up>::value && + __enable_if_t<__is_identity<_Proj>::value && !__is_trivially_equality_comparable_v<_Tp, _Up> && is_integral<_Tp>::value && is_integral<_Up>::value && is_signed<_Tp>::value == is_signed<_Up>::value, int> = 0> @@ -143,31 +208,23 @@ __find(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __la // segmented iterator implementation -template -struct __find_segment; - template ::value, int> = 0> + __enable_if_t<__is_segmented_iterator_v<_SegmentedIterator>, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator __find(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value, _Proj& __proj) { - return std::__find_segment_if(std::move(__first), std::move(__last), __find_segment<_Tp>(__value), __proj); + using __local_iterator = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator; + return std::__find_segment_if( + std::move(__first), + std::move(__last), + [&__value](__local_iterator __lfirst, __local_iterator __llast, _Proj& __lproj) { + return std::__rewrap_iter( + __lfirst, std::__find(std::__unwrap_iter(__lfirst), std::__unwrap_iter(__llast), __value, __lproj)); + }, + __proj); } -template -struct __find_segment { - const _Tp& __value_; - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __find_segment(const _Tp& __value) : __value_(__value) {} - - template - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _InputIterator - operator()(_InputIterator __first, _InputIterator __last, _Proj& __proj) const { - return std::__find(__first, __last, __value_, __proj); - } -}; - // public API template [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator diff --git a/lib/libcxx/include/__algorithm/find_end.h b/lib/libcxx/include/__algorithm/find_end.h index 86b4a3e2e3..84b43e31a3 100644 --- a/lib/libcxx/include/__algorithm/find_end.h +++ b/lib/libcxx/include/__algorithm/find_end.h @@ -76,6 +76,111 @@ _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter1, _Iter1> } } +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter1> __find_end_impl( + _Iter1 __first1, + _Sent1 __sent1, + _Iter2 __first2, + _Sent2 __sent2, + _Pred& __pred, + _Proj1& __proj1, + _Proj2& __proj2, + bidirectional_iterator_tag, + bidirectional_iterator_tag) { + auto __last1 = _IterOps<_AlgPolicy>::next(__first1, __sent1); + auto __last2 = _IterOps<_AlgPolicy>::next(__first2, __sent2); + // modeled after search algorithm (in reverse) + if (__first2 == __last2) + return std::make_pair(__last1, __last1); // Everything matches an empty sequence + _Iter1 __l1 = __last1; + _Iter2 __l2 = __last2; + --__l2; + while (true) { + // Find last element in sequence 1 that matches *(__last2-1), with a mininum of loop checks + while (true) { + if (__first1 == __l1) // return __last1 if no element matches *__first2 + return std::make_pair(__last1, __last1); + if (std::__invoke(__pred, std::__invoke(__proj1, *--__l1), std::__invoke(__proj2, *__l2))) + break; + } + // *__l1 matches *__l2, now match elements before here + _Iter1 __match_last = __l1; + _Iter1 __m1 = __l1; + _Iter2 __m2 = __l2; + while (true) { + if (__m2 == __first2) // If pattern exhausted, __m1 is the answer (works for 1 element pattern) + return std::make_pair(__m1, ++__match_last); + if (__m1 == __first1) // Otherwise if source exhaused, pattern not found + return std::make_pair(__last1, __last1); + + // if there is a mismatch, restart with a new __l1 + if (!std::__invoke(__pred, std::__invoke(__proj1, *--__m1), std::__invoke(__proj2, *--__m2))) { + break; + } // else there is a match, check next elements + } + } +} + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter1, _Iter1> __find_end_impl( + _Iter1 __first1, + _Sent1 __sent1, + _Iter2 __first2, + _Sent2 __sent2, + _Pred& __pred, + _Proj1& __proj1, + _Proj2& __proj2, + random_access_iterator_tag, + random_access_iterator_tag) { + typedef typename iterator_traits<_Iter1>::difference_type _D1; + auto __last1 = _IterOps<_AlgPolicy>::next(__first1, __sent1); + auto __last2 = _IterOps<_AlgPolicy>::next(__first2, __sent2); + // Take advantage of knowing source and pattern lengths. Stop short when source is smaller than pattern + auto __len2 = __last2 - __first2; + if (__len2 == 0) + return std::make_pair(__last1, __last1); + auto __len1 = __last1 - __first1; + if (__len1 < __len2) + return std::make_pair(__last1, __last1); + const _Iter1 __s = __first1 + _D1(__len2 - 1); // End of pattern match can't go before here + _Iter1 __l1 = __last1; + _Iter2 __l2 = __last2; + --__l2; + while (true) { + while (true) { + if (__s == __l1) + return std::make_pair(__last1, __last1); + if (std::__invoke(__pred, std::__invoke(__proj1, *--__l1), std::__invoke(__proj2, *__l2))) + break; + } + _Iter1 __last_match = __l1; + _Iter1 __m1 = __l1; + _Iter2 __m2 = __l2; + while (true) { + if (__m2 == __first2) + return std::make_pair(__m1, ++__last_match); + // no need to check range on __m1 because __s guarantees we have enough source + if (!std::__invoke(__pred, std::__invoke(__proj1, *--__m1), std::__invoke(__proj2, *--__m2))) { + break; + } + } + } +} + template [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator1 __find_end_classic( _ForwardIterator1 __first1, diff --git a/lib/libcxx/include/__algorithm/for_each.h b/lib/libcxx/include/__algorithm/for_each.h index 4167eec350..85fedce3d9 100644 --- a/lib/libcxx/include/__algorithm/for_each.h +++ b/lib/libcxx/include/__algorithm/for_each.h @@ -11,45 +11,41 @@ #define _LIBCPP___ALGORITHM_FOR_EACH_H #include <__algorithm/for_each_segment.h> +#include <__algorithm/specialized_algorithms.h> #include <__config> #include <__functional/identity.h> #include <__iterator/segmented_iterator.h> -#include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> -#include <__utility/move.h> +#include <__type_traits/is_same.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif -_LIBCPP_PUSH_MACROS -#include <__undef_macros> - _LIBCPP_BEGIN_NAMESPACE_STD template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator -__for_each(_InputIterator __first, _Sent __last, _Func& __f, _Proj& __proj) { +__for_each(_InputIterator __first, _Sent __last, _Func& __func, _Proj& __proj) { +#ifndef _LIBCPP_CXX03_LANG + if constexpr (using _SpecialAlg = + __specialized_algorithm<_Algorithm::__for_each, __iterator_pair<_InputIterator, _Sent>>; + _SpecialAlg::__has_algorithm) { + _SpecialAlg()(__first, __last, __func, __proj); + return __last; + } else if constexpr (is_same<_InputIterator, _Sent>::value && __is_segmented_iterator_v<_InputIterator>) { + using __local_iterator_t = typename __segmented_iterator_traits<_InputIterator>::__local_iterator; + std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) { + std::__for_each(__lfirst, __llast, __func, __proj); + }); + return __last; + } +#endif for (; __first != __last; ++__first) - std::__invoke(__f, std::__invoke(__proj, *__first)); + std::__invoke(__func, std::__invoke(__proj, *__first)); return __first; } -#ifndef _LIBCPP_CXX03_LANG -template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator -__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Func& __func, _Proj& __proj) { - using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator; - std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) { - std::__for_each(__lfirst, __llast, __func, __proj); - }); - return __last; -} -#endif // !_LIBCPP_CXX03_LANG - template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Func for_each(_InputIterator __first, _InputIterator __last, _Func __f) { @@ -60,6 +56,4 @@ for_each(_InputIterator __first, _InputIterator __last, _Func __f) { _LIBCPP_END_NAMESPACE_STD -_LIBCPP_POP_MACROS - #endif // _LIBCPP___ALGORITHM_FOR_EACH_H diff --git a/lib/libcxx/include/__algorithm/for_each_n.h b/lib/libcxx/include/__algorithm/for_each_n.h index 9a6c6bb517..72c7adb093 100644 --- a/lib/libcxx/include/__algorithm/for_each_n.h +++ b/lib/libcxx/include/__algorithm/for_each_n.h @@ -16,10 +16,7 @@ #include <__functional/identity.h> #include <__iterator/iterator_traits.h> #include <__iterator/segmented_iterator.h> -#include <__type_traits/disjunction.h> -#include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> -#include <__type_traits/negation.h> #include <__utility/convert_to_integral.h> #include <__utility/move.h> @@ -32,57 +29,33 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -template ::value && - _Or< _Not<__is_segmented_iterator<_InputIterator> >, - _Not<__has_random_access_local_iterator<_InputIterator> > >::value, - int> = 0> +template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator __for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f, _Proj& __proj) { typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize; _IntegralSize __n = __orig_n; - while (__n > 0) { - std::__invoke(__f, std::__invoke(__proj, *__first)); - ++__first; - --__n; - } - return std::move(__first); -} - -template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter -__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f, _Proj& __proj) { - typename std::iterator_traits<_RandIter>::difference_type __n = __orig_n; - auto __last = __first + __n; - std::__for_each(__first, __last, __f, __proj); - return __last; -} #ifndef _LIBCPP_CXX03_LANG -template ::value && - __is_segmented_iterator<_SegmentedIterator>::value && - __has_random_access_iterator_category< - typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value, - int> = 0> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator -__for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f, _Proj& __proj) { - using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator; - return std::__for_each_n_segment(__first, __orig_n, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) { - std::__for_each(__lfirst, __llast, __f, __proj); - }); + if constexpr (__is_segmented_iterator_v<_InputIterator>) { + using __local_iterator = typename __segmented_iterator_traits<_InputIterator>::__local_iterator; + if constexpr (__has_random_access_iterator_category<__local_iterator>::value) { + return std::__for_each_n_segment(__first, __orig_n, [&](__local_iterator __lfirst, __local_iterator __llast) { + std::__for_each(__lfirst, __llast, __f, __proj); + }); + } else { + return std::__for_each(__first, __first + __n, __f, __proj); + } + } else +#endif + { + while (__n > 0) { + std::__invoke(__f, std::__invoke(__proj, *__first)); + ++__first; + --__n; + } + return std::move(__first); + } } -#endif // !_LIBCPP_CXX03_LANG #if _LIBCPP_STD_VER >= 17 diff --git a/lib/libcxx/include/__algorithm/for_each_n_segment.h b/lib/libcxx/include/__algorithm/for_each_n_segment.h index 1b522fb373..a433df5d09 100644 --- a/lib/libcxx/include/__algorithm/for_each_n_segment.h +++ b/lib/libcxx/include/__algorithm/for_each_n_segment.h @@ -27,7 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator __for_each_n_segment(_SegmentedIterator __first, _Size __orig_n, _Functor __func) { - static_assert(__is_segmented_iterator<_SegmentedIterator>::value && + static_assert(__is_segmented_iterator_v<_SegmentedIterator> && __has_random_access_iterator_category< typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value, "__for_each_n_segment only works with segmented iterators with random-access local iterators"); diff --git a/lib/libcxx/include/__algorithm/for_each_segment.h b/lib/libcxx/include/__algorithm/for_each_segment.h index 93aa8259b2..c02436c9aa 100644 --- a/lib/libcxx/include/__algorithm/for_each_segment.h +++ b/lib/libcxx/include/__algorithm/for_each_segment.h @@ -48,6 +48,32 @@ __for_each_segment(_SegmentedIterator __first, _SegmentedIterator __last, _Funct __func(_Traits::__begin(__sfirst), _Traits::__local(__last)); } +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void +__for_each_segment_backward(_SegmentedIterator __first, _SegmentedIterator __last, _Functor __func) { + using _Traits = __segmented_iterator_traits<_SegmentedIterator>; + + auto __sfirst = _Traits::__segment(__first); + auto __slast = _Traits::__segment(__last); + + // We are in a single segment, so we might not be at the beginning or end + if (__sfirst == __slast) { + __func(_Traits::__local(__first), _Traits::__local(__last)); + return; + } + + // We have more than one segment. Iterate over the last segment, since we might not start at the end + __func(_Traits::__begin(__slast), _Traits::__local(__last)); + --__slast; + // iterate over the segments which are guaranteed to be completely in the range + while (__sfirst != __slast) { + __func(_Traits::__begin(__slast), _Traits::__end(__slast)); + --__slast; + } + // iterate over the first segment + __func(_Traits::__local(__first), _Traits::__end(__slast)); +} + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___ALGORITHM_FOR_EACH_SEGMENT_H diff --git a/lib/libcxx/include/__algorithm/generate.h b/lib/libcxx/include/__algorithm/generate.h index c95b527402..c4cd75cd0a 100644 --- a/lib/libcxx/include/__algorithm/generate.h +++ b/lib/libcxx/include/__algorithm/generate.h @@ -9,7 +9,9 @@ #ifndef _LIBCPP___ALGORITHM_GENERATE_H #define _LIBCPP___ALGORITHM_GENERATE_H +#include <__algorithm/for_each.h> #include <__config> +#include <__utility/forward.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -20,8 +22,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void generate(_ForwardIterator __first, _ForwardIterator __last, _Generator __gen) { - for (; __first != __last; ++__first) - *__first = __gen(); + using __iter_ref = decltype(*__first); + std::for_each(__first, __last, [&](__iter_ref __element) { std::forward<__iter_ref>(__element) = __gen(); }); } _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__algorithm/generate_n.h b/lib/libcxx/include/__algorithm/generate_n.h index f36403fd0f..23899e49e0 100644 --- a/lib/libcxx/include/__algorithm/generate_n.h +++ b/lib/libcxx/include/__algorithm/generate_n.h @@ -9,25 +9,38 @@ #ifndef _LIBCPP___ALGORITHM_GENERATE_N_H #define _LIBCPP___ALGORITHM_GENERATE_N_H +#include <__algorithm/for_each_n.h> #include <__config> -#include <__utility/convert_to_integral.h> +#include <__functional/identity.h> +#include <__utility/forward.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD +template +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator +__generate_n(_OutputIterator __first, _Size __orig_n, _Generator& __gen) { + using __iter_ref = decltype(*__first); + __identity __proj; + auto __f = [&](__iter_ref __element) { std::forward<__iter_ref>(__element) = __gen(); }; + return std::__for_each_n(std::move(__first), __orig_n, __f, __proj); +} + template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator generate_n(_OutputIterator __first, _Size __orig_n, _Generator __gen) { - typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize; - _IntegralSize __n = __orig_n; - for (; __n > 0; ++__first, (void)--__n) - *__first = __gen(); - return __first; + return std::__generate_n(std::move(__first), __orig_n, __gen); } _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_GENERATE_N_H diff --git a/lib/libcxx/include/__algorithm/is_permutation.h b/lib/libcxx/include/__algorithm/is_permutation.h index 1afb11596b..86f469c279 100644 --- a/lib/libcxx/include/__algorithm/is_permutation.h +++ b/lib/libcxx/include/__algorithm/is_permutation.h @@ -78,7 +78,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation_impl( _Pred&& __pred, _Proj1&& __proj1, _Proj2&& __proj2) { - using _D1 = __iter_diff_t<_Iter1>; + using _D1 = __iterator_difference_type<_Iter1>; for (auto __i = __first1; __i != __last1; ++__i) { // Have we already counted the number of *__i in [f1, l1)? @@ -126,7 +126,7 @@ template ; + using _D1 = __iterator_difference_type<_ForwardIterator1>; _D1 __l1 = _IterOps<_AlgPolicy>::distance(__first1, __last1); if (__l1 == _D1(1)) return false; @@ -173,10 +173,10 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation( if (__first2 == __last2) // Second range is shorter return false; - using _D1 = __iter_diff_t<_Iter1>; + using _D1 = __iterator_difference_type<_Iter1>; _D1 __l1 = _IterOps<_AlgPolicy>::distance(__first1, __last1); - using _D2 = __iter_diff_t<_Iter2>; + using _D2 = __iterator_difference_type<_Iter2>; _D2 __l2 = _IterOps<_AlgPolicy>::distance(__first2, __last2); if (__l1 != __l2) return false; diff --git a/lib/libcxx/include/__algorithm/iterator_operations.h b/lib/libcxx/include/__algorithm/iterator_operations.h index e5c89c1e67..1aa2f8d160 100644 --- a/lib/libcxx/include/__algorithm/iterator_operations.h +++ b/lib/libcxx/include/__algorithm/iterator_operations.h @@ -219,6 +219,9 @@ private: template using __policy_iter_diff_t _LIBCPP_NODEBUG = typename _IterOps<_AlgPolicy>::template __difference_type<_Iter>; +template +using __policy_value_type _LIBCPP_NODEBUG = typename _IterOps<_AlgPolicy>::template __value_type<_Iter>; + _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS diff --git a/lib/libcxx/include/__algorithm/lexicographical_compare.h b/lib/libcxx/include/__algorithm/lexicographical_compare.h index ebe7e3b56a..a12add69d4 100644 --- a/lib/libcxx/include/__algorithm/lexicographical_compare.h +++ b/lib/libcxx/include/__algorithm/lexicographical_compare.h @@ -66,8 +66,8 @@ template && !is_volatile<_Tp>::value && - __libcpp_is_trivially_equality_comparable<_Tp, _Tp>::value && - __is_identity<_Proj1>::value && __is_identity<_Proj2>::value, + __is_trivially_equality_comparable_v<_Tp, _Tp> && __is_identity<_Proj1>::value && + __is_identity<_Proj2>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __lexicographical_compare(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Tp* __last2, _Comp&, _Proj1&, _Proj2&) { diff --git a/lib/libcxx/include/__algorithm/lexicographical_compare_three_way.h b/lib/libcxx/include/__algorithm/lexicographical_compare_three_way.h index a5872e90cf..442223e79e 100644 --- a/lib/libcxx/include/__algorithm/lexicographical_compare_three_way.h +++ b/lib/libcxx/include/__algorithm/lexicographical_compare_three_way.h @@ -37,13 +37,13 @@ template _LIBCPP_HIDE_FROM_ABI constexpr auto __lexicographical_compare_three_way_fast_path( _InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2, _Cmp& __comp) -> decltype(__comp(*__first1, *__first2)) { - static_assert( - signed_integral<__iter_diff_t<_InputIterator1>>, "Using a non-integral difference_type is undefined behavior."); - static_assert( - signed_integral<__iter_diff_t<_InputIterator2>>, "Using a non-integral difference_type is undefined behavior."); + static_assert(signed_integral<__iterator_difference_type<_InputIterator1>>, + "Using a non-integral difference_type is undefined behavior."); + static_assert(signed_integral<__iterator_difference_type<_InputIterator2>>, + "Using a non-integral difference_type is undefined behavior."); - using _Len1 = __iter_diff_t<_InputIterator1>; - using _Len2 = __iter_diff_t<_InputIterator2>; + using _Len1 = __iterator_difference_type<_InputIterator1>; + using _Len2 = __iterator_difference_type<_InputIterator2>; using _Common = common_type_t<_Len1, _Len2>; _Len1 __len1 = __last1 - __first1; diff --git a/lib/libcxx/include/__algorithm/make_heap.h b/lib/libcxx/include/__algorithm/make_heap.h index e8f0cdb273..f98a0d2f89 100644 --- a/lib/libcxx/include/__algorithm/make_heap.h +++ b/lib/libcxx/include/__algorithm/make_heap.h @@ -12,9 +12,11 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> #include <__algorithm/iterator_operations.h> +#include <__algorithm/push_heap.h> #include <__algorithm/sift_down.h> #include <__config> #include <__iterator/iterator_traits.h> +#include <__type_traits/is_arithmetic.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -31,13 +33,23 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare&& __comp) { __comp_ref_type<_Compare> __comp_ref = __comp; - using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type; - difference_type __n = __last - __first; + using __diff_t = __iterator_difference_type<_RandomAccessIterator>; + const __diff_t __n = __last - __first; + + const bool __assume_both_children = is_arithmetic<__iterator_value_type<_RandomAccessIterator> >::value; + + // While it would be correct to always assume we have both children, in practice we observed this to be a performance + // improvement only for arithmetic types. + const __diff_t __sift_down_n = __assume_both_children ? ((__n & 1) ? __n : __n - 1) : __n; + if (__n > 1) { // start from the first parent, there is no need to consider children - for (difference_type __start = (__n - 2) / 2; __start >= 0; --__start) { - std::__sift_down<_AlgPolicy>(__first, __comp_ref, __n, __first + __start); + + for (__diff_t __start = (__sift_down_n - 2) / 2; __start >= 0; --__start) { + std::__sift_down<_AlgPolicy, __assume_both_children>(__first, __comp_ref, __sift_down_n, __start); } + if _LIBCPP_CONSTEXPR (__assume_both_children) + std::__sift_up<_AlgPolicy>(__first, __last, __comp, __n); } } diff --git a/lib/libcxx/include/__algorithm/mismatch.h b/lib/libcxx/include/__algorithm/mismatch.h index a6836792c0..7111cd9398 100644 --- a/lib/libcxx/include/__algorithm/mismatch.h +++ b/lib/libcxx/include/__algorithm/mismatch.h @@ -60,7 +60,7 @@ __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Pro template [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter, _Iter> __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) { - using __value_type = __iter_value_type<_Iter>; + using __value_type = __iterator_value_type<_Iter>; constexpr size_t __unroll_count = 4; constexpr size_t __vec_size = __native_vector_size<__value_type>; using __vec = __simd_vector<__value_type, __vec_size>; @@ -136,7 +136,7 @@ template ::value && __desugars_to_v<__equal_tag, _Pred, _Tp, _Tp> && __is_identity<_Proj1>::value && __is_identity<_Proj2>::value && - __can_map_to_integer_v<_Tp> && __libcpp_is_trivially_equality_comparable<_Tp, _Tp>::value, + __can_map_to_integer_v<_Tp> && __is_trivially_equality_comparable_v<_Tp, _Tp>, int> = 0> [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*> __mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { diff --git a/lib/libcxx/include/__algorithm/move.h b/lib/libcxx/include/__algorithm/move.h index a3320e9f19..ddadfa778f 100644 --- a/lib/libcxx/include/__algorithm/move.h +++ b/lib/libcxx/include/__algorithm/move.h @@ -50,37 +50,26 @@ struct __move_impl { return std::make_pair(std::move(__first), std::move(__result)); } - template - struct _MoveSegment { - using _Traits _LIBCPP_NODEBUG = __segmented_iterator_traits<_InIter>; - - _OutIter& __result_; - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit _MoveSegment(_OutIter& __result) - : __result_(__result) {} - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void - operator()(typename _Traits::__local_iterator __lfirst, typename _Traits::__local_iterator __llast) { - __result_ = std::__move<_AlgPolicy>(__lfirst, __llast, std::move(__result_)).second; - } - }; - - template ::value, int> = 0> + template , int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _InIter __last, _OutIter __result) const { - std::__for_each_segment(__first, __last, _MoveSegment<_InIter, _OutIter>(__result)); + using __local_iterator = typename __segmented_iterator_traits<_InIter>::__local_iterator; + std::__for_each_segment(__first, __last, [&__result](__local_iterator __lfirst, __local_iterator __llast) { + __result = std::__move<_AlgPolicy>(__lfirst, __llast, std::move(__result)).second; + }); return std::make_pair(__last, std::move(__result)); } template ::value && - !__is_segmented_iterator<_InIter>::value && __is_segmented_iterator<_OutIter>::value, + !__is_segmented_iterator_v<_InIter> && __is_segmented_iterator_v<_OutIter>, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _InIter __last, _OutIter __result) const { using _Traits = __segmented_iterator_traits<_OutIter>; - using _DiffT = typename common_type<__iter_diff_t<_InIter>, __iter_diff_t<_OutIter> >::type; + using _DiffT = + typename common_type<__iterator_difference_type<_InIter>, __iterator_difference_type<_OutIter> >::type; if (__first == __last) return std::make_pair(std::move(__first), std::move(__result)); diff --git a/lib/libcxx/include/__algorithm/move_backward.h b/lib/libcxx/include/__algorithm/move_backward.h index 14482fee18..43b72057a5 100644 --- a/lib/libcxx/include/__algorithm/move_backward.h +++ b/lib/libcxx/include/__algorithm/move_backward.h @@ -11,6 +11,7 @@ #include <__algorithm/copy_backward.h> #include <__algorithm/copy_move_common.h> +#include <__algorithm/for_each_segment.h> #include <__algorithm/iterator_operations.h> #include <__algorithm/min.h> #include <__config> @@ -51,42 +52,26 @@ struct __move_backward_impl { return std::make_pair(std::move(__original_last_iter), std::move(__result)); } - template ::value, int> = 0> + template , int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _InIter __last, _OutIter __result) const { - using _Traits = __segmented_iterator_traits<_InIter>; - auto __sfirst = _Traits::__segment(__first); - auto __slast = _Traits::__segment(__last); - if (__sfirst == __slast) { - auto __iters = - std::__move_backward<_AlgPolicy>(_Traits::__local(__first), _Traits::__local(__last), std::move(__result)); - return std::make_pair(__last, __iters.second); - } - - __result = - std::__move_backward<_AlgPolicy>(_Traits::__begin(__slast), _Traits::__local(__last), std::move(__result)) - .second; - --__slast; - while (__sfirst != __slast) { - __result = - std::__move_backward<_AlgPolicy>(_Traits::__begin(__slast), _Traits::__end(__slast), std::move(__result)) - .second; - --__slast; - } - __result = std::__move_backward<_AlgPolicy>(_Traits::__local(__first), _Traits::__end(__slast), std::move(__result)) - .second; + using __local_iterator = typename __segmented_iterator_traits<_InIter>::__local_iterator; + std::__for_each_segment_backward(__first, __last, [&__result](__local_iterator __lfirst, __local_iterator __llast) { + __result = std::__move_backward<_AlgPolicy>(std::move(__lfirst), std::move(__llast), std::move(__result)).second; + }); return std::make_pair(__last, std::move(__result)); } template ::value && - !__is_segmented_iterator<_InIter>::value && __is_segmented_iterator<_OutIter>::value, + !__is_segmented_iterator_v<_InIter> && __is_segmented_iterator_v<_OutIter>, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> operator()(_InIter __first, _InIter __last, _OutIter __result) const { using _Traits = __segmented_iterator_traits<_OutIter>; - using _DiffT = typename common_type<__iter_diff_t<_InIter>, __iter_diff_t<_OutIter> >::type; + using _DiffT = + typename common_type<__iterator_difference_type<_InIter>, __iterator_difference_type<_OutIter> >::type; // When the range contains no elements, __result might not be a valid iterator if (__first == __last) diff --git a/lib/libcxx/include/__algorithm/none_of.h b/lib/libcxx/include/__algorithm/none_of.h index e6bd197622..1e1c8d1aad 100644 --- a/lib/libcxx/include/__algorithm/none_of.h +++ b/lib/libcxx/include/__algorithm/none_of.h @@ -10,7 +10,9 @@ #ifndef _LIBCPP___ALGORITHM_NONE_OF_H #define _LIBCPP___ALGORITHM_NONE_OF_H +#include <__algorithm/any_of.h> #include <__config> +#include <__functional/identity.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -21,10 +23,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD template [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool none_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) { - for (; __first != __last; ++__first) - if (__pred(*__first)) - return false; - return true; + __identity __proj; + return !std::__any_of(__first, __last, __pred, __proj); } _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__algorithm/partial_sort.h b/lib/libcxx/include/__algorithm/partial_sort.h index 7f8d0c4914..4b39ae0cf2 100644 --- a/lib/libcxx/include/__algorithm/partial_sort.h +++ b/lib/libcxx/include/__algorithm/partial_sort.h @@ -45,7 +45,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator __part for (; __i != __last; ++__i) { if (__comp(*__i, *__first)) { _IterOps<_AlgPolicy>::iter_swap(__i, __first); - std::__sift_down<_AlgPolicy>(__first, __comp, __len, __first); + std::__sift_down<_AlgPolicy, false>(__first, __comp, __len, 0); } } std::__sort_heap<_AlgPolicy>(std::move(__first), std::move(__middle), __comp); diff --git a/lib/libcxx/include/__algorithm/partial_sort_copy.h b/lib/libcxx/include/__algorithm/partial_sort_copy.h index 172f53b290..2230dfc9cc 100644 --- a/lib/libcxx/include/__algorithm/partial_sort_copy.h +++ b/lib/libcxx/include/__algorithm/partial_sort_copy.h @@ -60,7 +60,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator, _Random for (; __first != __last; ++__first) if (std::__invoke(__comp, std::__invoke(__proj1, *__first), std::__invoke(__proj2, *__result_first))) { *__result_first = *__first; - std::__sift_down<_AlgPolicy>(__result_first, __projected_comp, __len, __result_first); + std::__sift_down<_AlgPolicy, false>(__result_first, __projected_comp, __len, 0); } std::__sort_heap<_AlgPolicy>(__result_first, __r, __projected_comp); } diff --git a/lib/libcxx/include/__algorithm/pstl.h b/lib/libcxx/include/__algorithm/pstl.h index aa7b49de93..7169dd85df 100644 --- a/lib/libcxx/include/__algorithm/pstl.h +++ b/lib/libcxx/include/__algorithm/pstl.h @@ -115,7 +115,7 @@ template , enable_if_t, int> = 0> -_LIBCPP_HIDE_FROM_ABI __iter_diff_t<_ForwardIterator> +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI __iterator_difference_type<_ForwardIterator> count_if(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR( _ForwardIterator, "count_if(first, last, pred) requires [first, last) to be ForwardIterators"); @@ -129,7 +129,7 @@ template , enable_if_t, int> = 0> -_LIBCPP_HIDE_FROM_ABI __iter_diff_t<_ForwardIterator> +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI __iterator_difference_type<_ForwardIterator> count(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR( _ForwardIterator, "count(first, last, val) requires [first, last) to be ForwardIterators"); @@ -144,7 +144,7 @@ template , enable_if_t, int> = 0> -_LIBCPP_HIDE_FROM_ABI bool +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool equal(_ExecutionPolicy&& __policy, _ForwardIterator1 __first1, _ForwardIterator1 __last1, @@ -166,7 +166,7 @@ template , enable_if_t, int> = 0> -_LIBCPP_HIDE_FROM_ABI bool +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool equal(_ExecutionPolicy&& __policy, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2) { _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(_ForwardIterator1, "equal requires ForwardIterators"); _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(_ForwardIterator2, "equal requires ForwardIterators"); @@ -185,7 +185,7 @@ template , enable_if_t, int> = 0> -_LIBCPP_HIDE_FROM_ABI bool +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool equal(_ExecutionPolicy&& __policy, _ForwardIterator1 __first1, _ForwardIterator1 __last1, @@ -209,7 +209,7 @@ template , enable_if_t, int> = 0> -_LIBCPP_HIDE_FROM_ABI bool +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool equal(_ExecutionPolicy&& __policy, _ForwardIterator1 __first1, _ForwardIterator1 __last1, @@ -259,7 +259,7 @@ template , enable_if_t, int> = 0> -_LIBCPP_HIDE_FROM_ABI _ForwardIterator +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI _ForwardIterator find_if(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(_ForwardIterator, "find_if requires ForwardIterators"); using _Implementation = __pstl::__dispatch<__pstl::__find_if, __pstl::__current_configuration, _RawPolicy>; @@ -272,7 +272,7 @@ template , enable_if_t, int> = 0> -_LIBCPP_HIDE_FROM_ABI _ForwardIterator +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI _ForwardIterator find_if_not(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(_ForwardIterator, "find_if_not requires ForwardIterators"); using _Implementation = __pstl::__dispatch<__pstl::__find_if_not, __pstl::__current_configuration, _RawPolicy>; @@ -285,7 +285,7 @@ template , enable_if_t, int> = 0> -_LIBCPP_HIDE_FROM_ABI _ForwardIterator +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI _ForwardIterator find(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(_ForwardIterator, "find requires ForwardIterators"); using _Implementation = __pstl::__dispatch<__pstl::__find, __pstl::__current_configuration, _RawPolicy>; diff --git a/lib/libcxx/include/__algorithm/radix_sort.h b/lib/libcxx/include/__algorithm/radix_sort.h index 055d8a0765..5549a69f5e 100644 --- a/lib/libcxx/include/__algorithm/radix_sort.h +++ b/lib/libcxx/include/__algorithm/radix_sort.h @@ -72,14 +72,14 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 14 template -_LIBCPP_HIDE_FROM_ABI constexpr pair<_OutputIterator, __iter_value_type<_InputIterator>> +_LIBCPP_HIDE_FROM_ABI constexpr pair<_OutputIterator, __iterator_value_type<_InputIterator>> __partial_sum_max(_InputIterator __first, _InputIterator __last, _OutputIterator __result) { if (__first == __last) return {__result, 0}; - auto __max = *__first; - __iter_value_type<_InputIterator> __sum = *__first; - *__result = __sum; + auto __max = *__first; + __iterator_value_type<_InputIterator> __sum = *__first; + *__result = __sum; while (++__first != __last) { if (__max < *__first) { @@ -124,7 +124,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr auto __nth_radix(size_t __radix_number, _Radix _ template _LIBCPP_HIDE_FROM_ABI constexpr void __collect(_ForwardIterator __first, _ForwardIterator __last, _Map __map, _RandomAccessIterator __counters) { - using __value_type = __iter_value_type<_ForwardIterator>; + using __value_type = __iterator_value_type<_ForwardIterator>; using __traits = __counting_sort_traits<__value_type, _Map>; std::for_each(__first, __last, [&__counters, &__map](const auto& __preimage) { ++__counters[__map(__preimage)]; }); @@ -160,7 +160,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool __collect_impl( _RandomAccessIterator1 __counters, _RandomAccessIterator2 __maximums, index_sequence<_Radices...>) { - using __value_type = __iter_value_type<_ForwardIterator>; + using __value_type = __iterator_value_type<_ForwardIterator>; constexpr auto __radix_value_range = __radix_sort_traits<__value_type, _Map, _Radix>::__radix_value_range; auto __previous = numeric_limits<__invoke_result_t<_Map, __value_type>>::min(); @@ -189,7 +189,7 @@ __collect(_ForwardIterator __first, _Radix __radix, _RandomAccessIterator1 __counters, _RandomAccessIterator2 __maximums) { - using __value_type = __iter_value_type<_ForwardIterator>; + using __value_type = __iterator_value_type<_ForwardIterator>; constexpr auto __radix_count = __radix_sort_traits<__value_type, _Map, _Radix>::__radix_count; return std::__collect_impl( __first, __last, __map, __radix, __counters, __maximums, make_index_sequence<__radix_count>()); @@ -213,10 +213,10 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __dispose_backward( template _LIBCPP_HIDE_FROM_ABI constexpr _RandomAccessIterator __counting_sort_impl(_ForwardIterator __first, _ForwardIterator __last, _RandomAccessIterator __result, _Map __map) { - using __value_type = __iter_value_type<_ForwardIterator>; + using __value_type = __iterator_value_type<_ForwardIterator>; using __traits = __counting_sort_traits<__value_type, _Map>; - __iter_diff_t<_RandomAccessIterator> __counters[__traits::__value_range + 1] = {0}; + __iterator_difference_type<_RandomAccessIterator> __counters[__traits::__value_range + 1] = {0}; std::__collect(__first, __last, __map, std::next(std::begin(__counters))); std::__dispose(__first, __last, __result, __map, std::begin(__counters)); @@ -224,12 +224,13 @@ __counting_sort_impl(_ForwardIterator __first, _ForwardIterator __last, _RandomA return __result + __counters[__traits::__value_range]; } -template , _Map, _Radix>::__radix_count == 1, - int> = 0> +template < + class _RandomAccessIterator1, + class _RandomAccessIterator2, + class _Map, + class _Radix, + enable_if_t<__radix_sort_traits<__iterator_value_type<_RandomAccessIterator1>, _Map, _Radix>::__radix_count == 1, + int> = 0> _LIBCPP_HIDE_FROM_ABI constexpr void __radix_sort_impl( _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, @@ -243,24 +244,25 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __radix_sort_impl( std::move(__buffer, __buffer_end, __first); } -template < - class _RandomAccessIterator1, - class _RandomAccessIterator2, - class _Map, - class _Radix, - enable_if_t< __radix_sort_traits<__iter_value_type<_RandomAccessIterator1>, _Map, _Radix>::__radix_count % 2 == 0, - int> = 0 > +template , _Map, _Radix>::__radix_count % 2 == 0, + int> = 0> _LIBCPP_HIDE_FROM_ABI constexpr void __radix_sort_impl( _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __buffer_begin, _Map __map, _Radix __radix) { - using __value_type = __iter_value_type<_RandomAccessIterator1>; + using __value_type = __iterator_value_type<_RandomAccessIterator1>; using __traits = __radix_sort_traits<__value_type, _Map, _Radix>; - __iter_diff_t<_RandomAccessIterator1> __counters[__traits::__radix_count][__traits::__radix_value_range] = {{0}}; - __iter_diff_t<_RandomAccessIterator1> __maximums[__traits::__radix_count] = {0}; + __iterator_difference_type<_RandomAccessIterator1> + __counters[__traits::__radix_count][__traits::__radix_value_range] = {{0}}; + __iterator_difference_type<_RandomAccessIterator1> __maximums[__traits::__radix_count] = {0}; const auto __is_sorted = std::__collect(__first, __last, __map, __radix, __counters, __maximums); if (!__is_sorted) { const auto __range_size = std::distance(__first, __last); diff --git a/lib/libcxx/include/__algorithm/ranges_copy_n.h b/lib/libcxx/include/__algorithm/ranges_copy_n.h index 1fbc61674e..6bee4c3e7c 100644 --- a/lib/libcxx/include/__algorithm/ranges_copy_n.h +++ b/lib/libcxx/include/__algorithm/ranges_copy_n.h @@ -9,16 +9,12 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_COPY_N_H #define _LIBCPP___ALGORITHM_RANGES_COPY_N_H -#include <__algorithm/copy.h> +#include <__algorithm/copy_n.h> #include <__algorithm/in_out_result.h> #include <__algorithm/iterator_operations.h> -#include <__algorithm/ranges_copy.h> #include <__config> -#include <__functional/identity.h> #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> -#include <__iterator/unreachable_sentinel.h> -#include <__iterator/wrap_iter.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -37,32 +33,13 @@ namespace ranges { template using copy_n_result = in_out_result<_Ip, _Op>; -// TODO: Merge this with copy_n struct __copy_n { - template - _LIBCPP_HIDE_FROM_ABI constexpr static copy_n_result<_InIter, _OutIter> - __go(_InIter __first, _DiffType __n, _OutIter __result) { - while (__n != 0) { - *__result = *__first; - ++__first; - ++__result; - --__n; - } - return {std::move(__first), std::move(__result)}; - } - - template - _LIBCPP_HIDE_FROM_ABI constexpr static copy_n_result<_InIter, _OutIter> - __go(_InIter __first, _DiffType __n, _OutIter __result) { - auto __ret = std::__copy(__first, __first + __n, __result); - return {__ret.first, __ret.second}; - } - template requires indirectly_copyable<_Ip, _Op> _LIBCPP_HIDE_FROM_ABI constexpr copy_n_result<_Ip, _Op> operator()(_Ip __first, iter_difference_t<_Ip> __n, _Op __result) const { - return __go(std::move(__first), __n, std::move(__result)); + auto __res = std::__copy_n<_RangeAlgPolicy>(std::move(__first), __n, std::move(__result)); + return {std::move(__res.first), std::move(__res.second)}; } }; diff --git a/lib/libcxx/include/__algorithm/ranges_equal.h b/lib/libcxx/include/__algorithm/ranges_equal.h index c26d13f002..8eb2fc1017 100644 --- a/lib/libcxx/include/__algorithm/ranges_equal.h +++ b/lib/libcxx/include/__algorithm/ranges_equal.h @@ -13,13 +13,12 @@ #include <__algorithm/unwrap_range.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__functional/ranges_operations.h> #include <__iterator/concepts.h> -#include <__iterator/distance.h> #include <__iterator/indirectly_comparable.h> #include <__ranges/access.h> #include <__ranges/concepts.h> +#include <__ranges/size.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -51,20 +50,17 @@ struct __equal { _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { - if constexpr (sized_sentinel_for<_Sent1, _Iter1> && sized_sentinel_for<_Sent2, _Iter2>) { + static constexpr bool __both_sized = sized_sentinel_for<_Sent1, _Iter1> && sized_sentinel_for<_Sent2, _Iter2>; + if constexpr (__both_sized) { if (__last1 - __first1 != __last2 - __first2) return false; } - auto __unwrapped1 = std::__unwrap_range(std::move(__first1), std::move(__last1)); - auto __unwrapped2 = std::__unwrap_range(std::move(__first2), std::move(__last2)); - return std::__equal_impl( - std::move(__unwrapped1.first), - std::move(__unwrapped1.second), - std::move(__unwrapped2.first), - std::move(__unwrapped2.second), - __pred, - __proj1, - __proj2); + + auto [__ufirst1, __ulast1] = std::__unwrap_range(std::move(__first1), std::move(__last1)); + auto [__ufirst2, __ulast2] = std::__unwrap_range(std::move(__first2), std::move(__last2)); + + return std::__equal_impl<__both_sized>( + std::move(__ufirst1), std::move(__ulast1), std::move(__ufirst2), std::move(__ulast2), __pred, __proj1, __proj2); } template , iterator_t<_Range2>, _Pred, _Proj1, _Proj2> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool operator()( _Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { - if constexpr (sized_range<_Range1> && sized_range<_Range2>) { - if (ranges::distance(__range1) != ranges::distance(__range2)) + static constexpr bool __both_sized = sized_range<_Range1> && sized_range<_Range2>; + if constexpr (__both_sized) { + if (ranges::size(__range1) != ranges::size(__range2)) return false; } - auto __unwrapped1 = std::__unwrap_range(ranges::begin(__range1), ranges::end(__range1)); - auto __unwrapped2 = std::__unwrap_range(ranges::begin(__range2), ranges::end(__range2)); - return std::__equal_impl( - std::move(__unwrapped1.first), - std::move(__unwrapped1.second), - std::move(__unwrapped2.first), - std::move(__unwrapped2.second), - __pred, - __proj1, - __proj2); - return false; + + auto [__ufirst1, __ulast1] = std::__unwrap_range(ranges::begin(__range1), ranges::end(__range1)); + auto [__ufirst2, __ulast2] = std::__unwrap_range(ranges::begin(__range2), ranges::end(__range2)); + return std::__equal_impl<__both_sized>( + std::move(__ufirst1), std::move(__ulast1), std::move(__ufirst2), std::move(__ulast2), __pred, __proj1, __proj2); } }; diff --git a/lib/libcxx/include/__algorithm/ranges_fill.h b/lib/libcxx/include/__algorithm/ranges_fill.h index c248009f98..814ae6363f 100644 --- a/lib/libcxx/include/__algorithm/ranges_fill.h +++ b/lib/libcxx/include/__algorithm/ranges_fill.h @@ -9,12 +9,14 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_FILL_H #define _LIBCPP___ALGORITHM_RANGES_FILL_H -#include <__algorithm/ranges_fill_n.h> +#include <__algorithm/fill.h> +#include <__algorithm/fill_n.h> #include <__config> #include <__iterator/concepts.h> #include <__ranges/access.h> #include <__ranges/concepts.h> #include <__ranges/dangling.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -31,12 +33,11 @@ namespace ranges { struct __fill { template _Iter, sentinel_for<_Iter> _Sent> _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, const _Type& __value) const { - if constexpr (random_access_iterator<_Iter> && sized_sentinel_for<_Sent, _Iter>) { - return ranges::fill_n(__first, __last - __first, __value); + if constexpr (sized_sentinel_for<_Sent, _Iter>) { + auto __n = __last - __first; + return std::__fill_n(std::move(__first), __n, __value); } else { - for (; __first != __last; ++__first) - *__first = __value; - return __first; + return std::__fill(std::move(__first), std::move(__last), __value); } } diff --git a/lib/libcxx/include/__algorithm/ranges_for_each.h b/lib/libcxx/include/__algorithm/ranges_for_each.h index e9c84e8583..7a547fb269 100644 --- a/lib/libcxx/include/__algorithm/ranges_for_each.h +++ b/lib/libcxx/include/__algorithm/ranges_for_each.h @@ -12,6 +12,7 @@ #include <__algorithm/for_each.h> #include <__algorithm/for_each_n.h> #include <__algorithm/in_fun_result.h> +#include <__algorithm/specialized_algorithms.h> #include <__concepts/assignable.h> #include <__config> #include <__functional/identity.h> @@ -20,6 +21,7 @@ #include <__ranges/access.h> #include <__ranges/concepts.h> #include <__ranges/dangling.h> +#include <__type_traits/remove_cvref.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -71,7 +73,13 @@ public: indirectly_unary_invocable, _Proj>> _Func> _LIBCPP_HIDE_FROM_ABI constexpr for_each_result, _Func> operator()(_Range&& __range, _Func __func, _Proj __proj = {}) const { - return __for_each_impl(ranges::begin(__range), ranges::end(__range), __func, __proj); + using _SpecialAlg = __specialized_algorithm<_Algorithm::__for_each, __single_range>>; + if constexpr (_SpecialAlg::__has_algorithm) { + auto [__iter, __func2] = _SpecialAlg()(__range, std::move(__func), std::move(__proj)); + return {std::move(__iter), std::move(__func)}; + } else { + return __for_each_impl(ranges::begin(__range), ranges::end(__range), __func, __proj); + } } }; diff --git a/lib/libcxx/include/__algorithm/ranges_generate_n.h b/lib/libcxx/include/__algorithm/ranges_generate_n.h index a318994d0e..0cc9ce7b11 100644 --- a/lib/libcxx/include/__algorithm/ranges_generate_n.h +++ b/lib/libcxx/include/__algorithm/ranges_generate_n.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_GENERATE_N_H #define _LIBCPP___ALGORITHM_RANGES_GENERATE_N_H +#include <__algorithm/generate_n.h> #include <__concepts/constructible.h> #include <__concepts/invocable.h> #include <__config> @@ -38,12 +39,7 @@ struct __generate_n { requires invocable<_Func&> && indirectly_writable<_OutIter, invoke_result_t<_Func&>> _LIBCPP_HIDE_FROM_ABI constexpr _OutIter operator()(_OutIter __first, iter_difference_t<_OutIter> __n, _Func __gen) const { - for (; __n > 0; --__n) { - *__first = __gen(); - ++__first; - } - - return __first; + return std::__generate_n(std::move(__first), __n, __gen); } }; diff --git a/lib/libcxx/include/__algorithm/ranges_search_n.h b/lib/libcxx/include/__algorithm/ranges_search_n.h index 81b568c096..746bfcc3d1 100644 --- a/lib/libcxx/include/__algorithm/ranges_search_n.h +++ b/lib/libcxx/include/__algorithm/ranges_search_n.h @@ -54,8 +54,8 @@ struct __search_n { } if constexpr (random_access_iterator<_Iter1>) { - auto __ret = std::__search_n_random_access_impl<_RangeAlgPolicy>( - __first, __last, __count, __value, __pred, __proj, __size); + auto __ret = + std::__search_n_random_access_impl<_RangeAlgPolicy>(__first, __count, __value, __pred, __proj, __size); return {std::move(__ret.first), std::move(__ret.second)}; } } diff --git a/lib/libcxx/include/__algorithm/rotate.h b/lib/libcxx/include/__algorithm/rotate.h index c676980f0c..b6d9eb3b2d 100644 --- a/lib/libcxx/include/__algorithm/rotate.h +++ b/lib/libcxx/include/__algorithm/rotate.h @@ -12,16 +12,13 @@ #include <__algorithm/copy.h> #include <__algorithm/copy_backward.h> #include <__algorithm/iterator_operations.h> +#include <__algorithm/min.h> #include <__algorithm/move.h> #include <__algorithm/move_backward.h> #include <__algorithm/swap_ranges.h> #include <__config> -#include <__cstddef/size_t.h> #include <__fwd/bit_reference.h> #include <__iterator/iterator_traits.h> -#include <__memory/construct_at.h> -#include <__memory/pointer_traits.h> -#include <__type_traits/is_constant_evaluated.h> #include <__type_traits/is_trivially_assignable.h> #include <__utility/move.h> #include <__utility/pair.h> @@ -89,46 +86,32 @@ __rotate_forward(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIt return __r; } -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _Integral __algo_gcd(_Integral __x, _Integral __y) { - do { - _Integral __t = __x % __y; - __x = __y; - __y = __t; - } while (__y); - return __x; -} +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _Iter +__rotate_random_access(_Iter __first, _Iter __middle, _Sent __sent) { + auto __left = _IterOps<_AlgPolicy>::distance(__first, __middle); + auto __right = _IterOps<_AlgPolicy>::distance(__middle, __sent); + auto __last = __first + __right; -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _RandomAccessIterator -__rotate_gcd(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last) { - typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; - typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; - using _Ops = _IterOps<_AlgPolicy>; + auto __min_len = std::min(__left, __right); - const difference_type __m1 = __middle - __first; - const difference_type __m2 = _Ops::distance(__middle, __last); - if (__m1 == __m2) { - std::__swap_ranges<_AlgPolicy>(__first, __middle, __middle, __last); - return __middle; + while (__min_len > 0) { + if (__left <= __right) { + do { + std::__swap_ranges<_AlgPolicy>(__first, __first + __left, __first + __left); + __first += __left; + __right -= __left; + } while (__left <= __right); + __min_len = __right; + } else { + do { + std::__swap_ranges<_AlgPolicy>(__first + (__left - __right), __first + __left, __first + __left); + __left -= __right; + } while (__left > __right); + __min_len = __left; + } } - const difference_type __g = std::__algo_gcd(__m1, __m2); - for (_RandomAccessIterator __p = __first + __g; __p != __first;) { - value_type __t(_Ops::__iter_move(--__p)); - _RandomAccessIterator __p1 = __p; - _RandomAccessIterator __p2 = __p1 + __m1; - do { - *__p1 = _Ops::__iter_move(__p2); - __p1 = __p2; - const difference_type __d = _Ops::distance(__p2, __last); - if (__m1 < __d) - __p2 += __m1; - else - __p2 = __first + (__m1 - __d); - } while (__p2 != __p); - *__p1 = std::move(__t); - } - return __first + __m2; + return __last; } template @@ -170,7 +153,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _RandomAccessIterator return std::__rotate_left<_AlgPolicy>(__first, __last); if (_IterOps<_AlgPolicy>::next(__middle) == __last) return std::__rotate_right<_AlgPolicy>(__first, __last); - return std::__rotate_gcd<_AlgPolicy>(__first, __middle, __last); + return std::__rotate_random_access<_AlgPolicy>(__first, __middle, __last); } return std::__rotate_forward<_AlgPolicy>(__first, __middle, __last); } diff --git a/lib/libcxx/include/__algorithm/search_n.h b/lib/libcxx/include/__algorithm/search_n.h index 38474e1b23..0962542e13 100644 --- a/lib/libcxx/include/__algorithm/search_n.h +++ b/lib/libcxx/include/__algorithm/search_n.h @@ -14,11 +14,7 @@ #include <__algorithm/iterator_operations.h> #include <__config> #include <__functional/identity.h> -#include <__iterator/advance.h> -#include <__iterator/concepts.h> -#include <__iterator/distance.h> #include <__iterator/iterator_traits.h> -#include <__ranges/concepts.h> #include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> #include <__type_traits/is_callable.h> @@ -68,44 +64,60 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter> __search_ } } -template +// Finds the longest suffix in [__first, __last) where each element satisfies __pred. +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _RAIter +__find_longest_suffix(_RAIter __first, _RAIter __last, const _ValueT& __value, _Pred& __pred, _Proj& __proj) { + while (__first != __last) { + if (!std::__invoke(__pred, std::__invoke(__proj, *--__last), __value)) { + return ++__last; + } + } + return __first; +} + +template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 std::pair<_Iter, _Iter> __search_n_random_access_impl( - _Iter __first, _Sent __last, _SizeT __count, const _Type& __value, _Pred& __pred, _Proj& __proj, _DiffT __size1) { - using difference_type = typename iterator_traits<_Iter>::difference_type; + _Iter __first, _SizeT __count_in, const _Type& __value, _Pred& __pred, _Proj& __proj, _DiffT __size) { + auto __last = __first + __size; + auto __count = static_cast<_DiffT>(__count_in); + if (__count == 0) return std::make_pair(__first, __first); - if (__size1 < static_cast<_DiffT>(__count)) { - _IterOps<_AlgPolicy>::__advance_to(__first, __last); - return std::make_pair(__first, __first); - } + if (__size < __count) + return std::make_pair(__last, __last); + + // [__match_start, __match_start + __count) is the subrange which we currently check whether it only contains matching + // elements. This subrange is returned in case all the elements match. + // [__match_start, __matched_until) is the longest subrange where all elements are known to match at any given point + // in time. + // [__matched_until, __match_start + __count) is the subrange where we don't know whether the elements match. + + // This algorithm tries to expand the subrange [__match_start, __matched_until) into a range of sufficient length. + // When we fail to do that because we find a mismatching element, we move it forward to the beginning of the next + // consecutive sequence that is not known not to match. + + const _Iter __try_match_until = __last - __count; + _Iter __match_start = __first; + _Iter __matched_until = __first; - const auto __s = __first + __size1 - difference_type(__count - 1); // Start of pattern match can't go beyond here while (true) { - // Find first element in sequence that matchs __value, with a mininum of loop checks - while (true) { - if (__first >= __s) { // return __last if no element matches __value - _IterOps<_AlgPolicy>::__advance_to(__first, __last); - return std::make_pair(__first, __first); - } - if (std::__invoke(__pred, std::__invoke(__proj, *__first), __value)) - break; - ++__first; - } - // *__first matches __value_, now match elements after here - auto __m = __first; - _SizeT __c(0); - while (true) { - if (++__c == __count) // If pattern exhausted, __first is the answer (works for 1 element pattern) - return std::make_pair(__first, __first + _DiffT(__count)); - ++__m; // no need to check range on __m because __s guarantees we have enough source + // There's no chance of expanding the subrange into a sequence of sufficient length, since we don't have enough + // elements in the haystack anymore. + if (__match_start > __try_match_until) + return std::make_pair(__last, __last); - // if there is a mismatch, restart with a new __first - if (!std::__invoke(__pred, std::__invoke(__proj, *__m), __value)) { - __first = __m; - ++__first; - break; - } // else there is a match, check next elements - } + auto __mismatch = std::__find_longest_suffix(__matched_until, __match_start + __count, __value, __pred, __proj); + + // If all elements in [__matched_until, __match_start + __count) match, we know that + // [__match_start, __match_start + __count) is a full sequence of matching elements, so we're done. + if (__mismatch == __matched_until) + return std::make_pair(__match_start, __match_start + __count); + + // Otherwise, we have to move the [__match_start, __matched_until) subrange forward past the point where we know for + // sure a match is impossible. + __matched_until = __match_start + __count; + __match_start = __mismatch; } } @@ -119,7 +131,7 @@ template __search_n_impl(_Iter __first, _Sent __last, _DiffT __count, const _Type& __value, _Pred& __pred, _Proj& __proj) { return std::__search_n_random_access_impl<_ClassicAlgPolicy>( - __first, __last, __count, __value, __pred, __proj, __last - __first); + __first, __count, __value, __pred, __proj, __last - __first); } template +template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void __sift_down(_RandomAccessIterator __first, _Compare&& __comp, - typename iterator_traits<_RandomAccessIterator>::difference_type __len, - _RandomAccessIterator __start) { + __iterator_difference_type<_RandomAccessIterator> __len, + __iterator_difference_type<_RandomAccessIterator> __start) { using _Ops = _IterOps<_AlgPolicy>; typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; // left-child of __start is at 2 * __start + 1 // right-child of __start is at 2 * __start + 2 - difference_type __child = __start - __first; + difference_type __child = __start; if (__len < 2 || (__len - 2) / 2 < __child) return; - __child = 2 * __child + 1; - _RandomAccessIterator __child_i = __first + __child; + __child = 2 * __child + 1; - if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) { + if _LIBCPP_CONSTEXPR (__assume_both_children) { + // right-child exists and is greater than left-child + __child += __comp(__first[__child], __first[__child + 1]); + } else if ((__child + 1) < __len && __comp(__first[__child], __first[__child + 1])) { // right-child exists and is greater than left-child - ++__child_i; ++__child; } // check if we are in heap-order - if (__comp(*__child_i, *__start)) + if (__comp(__first[__child], __first[__start])) // we are, __start is larger than its largest child return; - value_type __top(_Ops::__iter_move(__start)); + value_type __top(_Ops::__iter_move(__first + __start)); do { // we are not in heap-order, swap the parent with its largest child - *__start = _Ops::__iter_move(__child_i); - __start = __child_i; + __first[__start] = _Ops::__iter_move(__first + __child); + __start = __child; if ((__len - 2) / 2 < __child) break; // recompute the child based off of the updated parent - __child = 2 * __child + 1; - __child_i = __first + __child; + __child = 2 * __child + 1; - if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) { + if _LIBCPP_CONSTEXPR (__assume_both_children) { + __child += __comp(__first[__child], __first[__child + 1]); + } else if ((__child + 1) < __len && __comp(__first[__child], __first[__child + 1])) { // right-child exists and is greater than left-child - ++__child_i; ++__child; } // check if we are in heap-order - } while (!__comp(*__child_i, __top)); - *__start = std::move(__top); + } while (!__comp(__first[__child], __top)); + __first[__start] = std::move(__top); } template diff --git a/lib/libcxx/include/__algorithm/simd_utils.h b/lib/libcxx/include/__algorithm/simd_utils.h index 47942a09e6..f73c9ea4b6 100644 --- a/lib/libcxx/include/__algorithm/simd_utils.h +++ b/lib/libcxx/include/__algorithm/simd_utils.h @@ -26,9 +26,7 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> // TODO: Find out how altivec changes things and allow vectorizations there too. -// TODO: Simplify this condition once we stop building with AppleClang 15 in the CI. -#if _LIBCPP_STD_VER >= 14 && defined(_LIBCPP_COMPILER_CLANG_BASED) && !defined(__ALTIVEC__) && \ - !(defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 1600) +#if _LIBCPP_STD_VER >= 14 && defined(_LIBCPP_COMPILER_CLANG_BASED) && !defined(__ALTIVEC__) # define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 1 #else # define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 0 @@ -116,16 +114,47 @@ template }(make_index_sequence<__simd_vector_size_v<_VecT>>{}); } +// Load the first _Np elements, zero the rest +_LIBCPP_DIAGNOSTIC_PUSH +_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wpsabi") +template +[[__nodiscard__]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT __partial_load(_Iter __iter) noexcept { + return [=]( + index_sequence<_LoadIndices...>, index_sequence<_ZeroIndices...>) _LIBCPP_ALWAYS_INLINE noexcept { + return _VecT{__iter[_LoadIndices]..., ((void)_ZeroIndices, 0)...}; + }(make_index_sequence<_Np>{}, make_index_sequence<__simd_vector_size_v<_VecT> - _Np>{}); +} + +// Create a vector where every elements is __val +template +[[__nodiscard__]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT +__broadcast(__simd_vector_underlying_type_t<_VecT> __val) { + return [&](index_sequence<_Indices...>) { + return _VecT{((void)_Indices, __val)...}; + }(make_index_sequence<__simd_vector_size_v<_VecT>>()); +} +_LIBCPP_DIAGNOSTIC_POP + +template +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __any_of(__simd_vector<_Tp, _Np> __vec) noexcept { + return __builtin_reduce_or(__builtin_convertvector(__vec, __simd_vector)); +} + template [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<_Tp, _Np> __vec) noexcept { return __builtin_reduce_and(__builtin_convertvector(__vec, __simd_vector)); } +template +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __none_of(__simd_vector<_Tp, _Np> __vec) noexcept { + return !__builtin_reduce_or(__builtin_convertvector(__vec, __simd_vector)); +} + template [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept { using __mask_vec = __simd_vector; - // This has MSan disabled du to https://github.com/llvm/llvm-project/issues/85876 + // This has MSan disabled du to https://llvm.org/PR85876 auto __impl = [&](_MaskT) _LIBCPP_NO_SANITIZE("memory") noexcept { # if defined(_LIBCPP_BIG_ENDIAN) return std::min( diff --git a/lib/libcxx/include/__algorithm/specialized_algorithms.h b/lib/libcxx/include/__algorithm/specialized_algorithms.h new file mode 100644 index 0000000000..7cb94c015f --- /dev/null +++ b/lib/libcxx/include/__algorithm/specialized_algorithms.h @@ -0,0 +1,54 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_SPECIALIZED_ALGORITHMS_H +#define _LIBCPP___ALGORITHM_SPECIALIZED_ALGORITHMS_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace _Algorithm { +struct __copy {}; +struct __fill_n {}; +struct __for_each {}; +} // namespace _Algorithm + +template +struct __single_iterator; + +template +struct __iterator_pair; + +template +struct __single_range; + +// This struct allows specializing algorithms for specific arguments. This is useful when we know a more efficient +// algorithm implementation for e.g. library-defined iterators. _Alg is one of tags defined inside the _Algorithm +// namespace above. _Ranges is an essentially arbitrary subset of the arguments to the algorithm that are used for +// dispatching. This set is specific to the algorithm: look at each algorithm to see which arguments they use for +// dispatching to specialized algorithms. +// +// A specialization of `__specialized_algorithm` has to define `__has_algorithm` to true for the specialized algorithm +// to be used. This is intended for cases where iterators can do generic unwrapping and forward to a different +// specialization of `__specialized_algorithm`. +// +// If __has_algorithm is true, there has to be an operator() which will get called with the actual arguments to the +// algorithm. +template +struct __specialized_algorithm { + static const bool __has_algorithm = false; +}; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___ALGORITHM_SPECIALIZED_ALGORITHMS_H diff --git a/lib/libcxx/include/__algorithm/stable_sort.h b/lib/libcxx/include/__algorithm/stable_sort.h index 1ca66f6a51..64c8080834 100644 --- a/lib/libcxx/include/__algorithm/stable_sort.h +++ b/lib/libcxx/include/__algorithm/stable_sort.h @@ -247,7 +247,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX26 void __stable_sort( constexpr auto __default_comp = __desugars_to_v<__less_tag, _Compare, value_type, value_type >; constexpr auto __radix_sortable = __is_ordered_integer_representable_v && - is_same_v< value_type&, __iter_reference<_RandomAccessIterator>>; + is_same_v< value_type&, __iterator_reference<_RandomAccessIterator>>; if constexpr (__default_comp && __radix_sortable) { if (__len <= __buff_size && __len >= static_cast(std::__radix_sort_min_bound()) && __len <= static_cast(std::__radix_sort_max_bound())) { diff --git a/lib/libcxx/include/__assertion_handler b/lib/libcxx/include/__assertion_handler index f115658f9f..d352405e90 100644 --- a/lib/libcxx/include/__assertion_handler +++ b/lib/libcxx/include/__assertion_handler @@ -16,6 +16,7 @@ # include <__cxx03/__verbose_trap> #else # include <__config> +# include <__log_hardening_failure> # include <__verbose_abort> # include <__verbose_trap> #endif @@ -24,14 +25,40 @@ # pragma GCC system_header #endif -#if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG +#if __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) -# define _LIBCPP_ASSERTION_HANDLER(message) _LIBCPP_VERBOSE_ABORT("%s", message) +// Keep the old implementation that doesn't support assertion semantics for backward compatibility with the frozen C++03 +// mode. +# if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG +# define _LIBCPP_ASSERTION_HANDLER(message) _LIBCPP_VERBOSE_ABORT("%s", message) +# else +# define _LIBCPP_ASSERTION_HANDLER(message) _LIBCPP_VERBOSE_TRAP(message) +# endif // _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG #else -# define _LIBCPP_ASSERTION_HANDLER(message) _LIBCPP_VERBOSE_TRAP(message) +# if _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_IGNORE +# define _LIBCPP_ASSERTION_HANDLER(message) ((void)0) -#endif // _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG +# elif _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_OBSERVE +# define _LIBCPP_ASSERTION_HANDLER(message) _LIBCPP_LOG_HARDENING_FAILURE(message) + +# elif _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE +# define _LIBCPP_ASSERTION_HANDLER(message) _LIBCPP_VERBOSE_TRAP(message) + +# elif _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_ENFORCE +# define _LIBCPP_ASSERTION_HANDLER(message) _LIBCPP_VERBOSE_ABORT("%s", message) + +# else + +# error _LIBCPP_ASSERTION_SEMANTIC must be set to one of the following values: \ +_LIBCPP_ASSERTION_SEMANTIC_IGNORE, \ +_LIBCPP_ASSERTION_SEMANTIC_OBSERVE, \ +_LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE, \ +_LIBCPP_ASSERTION_SEMANTIC_ENFORCE + +# endif // _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_IGNORE + +#endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) #endif // _LIBCPP___ASSERTION_HANDLER diff --git a/lib/libcxx/include/__atomic/atomic.h b/lib/libcxx/include/__atomic/atomic.h index eead49dde6..02528cd964 100644 --- a/lib/libcxx/include/__atomic/atomic.h +++ b/lib/libcxx/include/__atomic/atomic.h @@ -10,7 +10,9 @@ #define _LIBCPP___ATOMIC_ATOMIC_H #include <__atomic/atomic_sync.h> +#include <__atomic/atomic_waitable_traits.h> #include <__atomic/check_memory_order.h> +#include <__atomic/floating_point_helper.h> #include <__atomic/is_always_lock_free.h> #include <__atomic/memory_order.h> #include <__atomic/support.h> @@ -47,10 +49,10 @@ struct __atomic_base // false static constexpr bool is_always_lock_free = __libcpp_is_always_lock_free<__cxx_atomic_impl<_Tp> >::__value; #endif - _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const volatile _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const volatile _NOEXCEPT { return __cxx_atomic_is_lock_free(sizeof(__cxx_atomic_impl<_Tp>)); } - _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const _NOEXCEPT { return static_cast<__atomic_base const volatile*>(this)->is_lock_free(); } _LIBCPP_HIDE_FROM_ABI void store(_Tp __d, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT @@ -61,11 +63,11 @@ struct __atomic_base // false _LIBCPP_CHECK_STORE_MEMORY_ORDER(__m) { std::__cxx_atomic_store(std::addressof(__a_), __d, __m); } - _LIBCPP_HIDE_FROM_ABI _Tp load(memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _Tp load(memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) { return std::__cxx_atomic_load(std::addressof(__a_), __m); } - _LIBCPP_HIDE_FROM_ABI _Tp load(memory_order __m = memory_order_seq_cst) const _NOEXCEPT + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _Tp load(memory_order __m = memory_order_seq_cst) const _NOEXCEPT _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) { return std::__cxx_atomic_load(std::addressof(__a_), __m); } @@ -113,22 +115,16 @@ struct __atomic_base // false } #if _LIBCPP_STD_VER >= 20 - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const - volatile _NOEXCEPT { + _LIBCPP_HIDE_FROM_ABI void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT { std::__atomic_wait(*this, __v, __m); } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void - wait(_Tp __v, memory_order __m = memory_order_seq_cst) const _NOEXCEPT { + _LIBCPP_HIDE_FROM_ABI void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const _NOEXCEPT { std::__atomic_wait(*this, __v, __m); } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() volatile _NOEXCEPT { - std::__atomic_notify_one(*this); - } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() _NOEXCEPT { std::__atomic_notify_one(*this); } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() volatile _NOEXCEPT { - std::__atomic_notify_all(*this); - } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT { std::__atomic_notify_all(*this); } + _LIBCPP_HIDE_FROM_ABI void notify_one() volatile _NOEXCEPT { std::__atomic_notify_one(*this); } + _LIBCPP_HIDE_FROM_ABI void notify_one() _NOEXCEPT { std::__atomic_notify_one(*this); } + _LIBCPP_HIDE_FROM_ABI void notify_all() volatile _NOEXCEPT { std::__atomic_notify_all(*this); } + _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT { std::__atomic_notify_all(*this); } #endif // _LIBCPP_STD_VER >= 20 #if _LIBCPP_STD_VER >= 20 @@ -205,12 +201,15 @@ struct __atomic_base<_Tp, true> : public __atomic_base<_Tp, false> { _LIBCPP_HIDE_FROM_ABI _Tp operator^=(_Tp __op) _NOEXCEPT { return fetch_xor(__op) ^ __op; } }; +#if _LIBCPP_STD_VER >= 20 // Here we need _IsIntegral because the default template argument is not enough // e.g __atomic_base is __atomic_base, which inherits from // __atomic_base and the caller of the wait function is // __atomic_base. So specializing __atomic_base<_Tp> does not work template struct __atomic_waitable_traits<__atomic_base<_Tp, _IsIntegral> > { + using __value_type _LIBCPP_NODEBUG = _Tp; + static _LIBCPP_HIDE_FROM_ABI _Tp __atomic_load(const __atomic_base<_Tp, _IsIntegral>& __a, memory_order __order) { return __a.load(__order); } @@ -231,6 +230,8 @@ struct __atomic_waitable_traits<__atomic_base<_Tp, _IsIntegral> > { } }; +#endif // _LIBCPP_STD_VER >= 20 + template struct __check_atomic_mandates { using type _LIBCPP_NODEBUG = _Tp; @@ -324,50 +325,26 @@ struct atomic<_Tp*> : public __atomic_base<_Tp*> { atomic& operator=(const atomic&) volatile = delete; }; +#if _LIBCPP_STD_VER >= 20 template struct __atomic_waitable_traits > : __atomic_waitable_traits<__atomic_base<_Tp> > {}; -#if _LIBCPP_STD_VER >= 20 template requires is_floating_point_v<_Tp> struct atomic<_Tp> : __atomic_base<_Tp> { private: - _LIBCPP_HIDE_FROM_ABI static constexpr bool __is_fp80_long_double() { - // Only x87-fp80 long double has 64-bit mantissa - return __LDBL_MANT_DIG__ == 64 && std::is_same_v<_Tp, long double>; - } - - _LIBCPP_HIDE_FROM_ABI static constexpr bool __has_rmw_builtin() { -# ifndef _LIBCPP_COMPILER_CLANG_BASED - return false; -# else - // The builtin __cxx_atomic_fetch_add errors during compilation for - // long double on platforms with fp80 format. - // For more details, see - // lib/Sema/SemaChecking.cpp function IsAllowedValueType - // LLVM Parser does not allow atomicrmw with x86_fp80 type. - // if (ValType->isSpecificBuiltinType(BuiltinType::LongDouble) && - // &Context.getTargetInfo().getLongDoubleFormat() == - // &llvm::APFloat::x87DoubleExtended()) - // For more info - // https://github.com/llvm/llvm-project/issues/68602 - // https://reviews.llvm.org/D53965 - return !__is_fp80_long_double(); -# endif - } - template _LIBCPP_HIDE_FROM_ABI static _Tp __rmw_op(_This&& __self, _Tp __operand, memory_order __m, _Operation __operation, _BuiltinOp __builtin_op) { - if constexpr (__has_rmw_builtin()) { + if constexpr (std::__has_rmw_builtin<_Tp>()) { return __builtin_op(std::addressof(std::forward<_This>(__self).__a_), __operand, __m); } else { _Tp __old = __self.load(memory_order_relaxed); _Tp __new = __operation(__old, __operand); while (!__self.compare_exchange_weak(__old, __new, __m, memory_order_relaxed)) { # ifdef _LIBCPP_COMPILER_CLANG_BASED - if constexpr (__is_fp80_long_double()) { - // https://github.com/llvm/llvm-project/issues/47978 + if constexpr (std::__is_fp80_long_double<_Tp>()) { + // https://llvm.org/PR47978 // clang bug: __old is not updated on failure for atomic::compare_exchange_weak // Note __old = __self.load(memory_order_relaxed) will not work std::__cxx_atomic_load_inplace(std::addressof(__self.__a_), std::addressof(__old), memory_order_relaxed); @@ -462,12 +439,12 @@ public: // atomic_is_lock_free template -_LIBCPP_HIDE_FROM_ABI bool atomic_is_lock_free(const volatile atomic<_Tp>* __o) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool atomic_is_lock_free(const volatile atomic<_Tp>* __o) _NOEXCEPT { return __o->is_lock_free(); } template -_LIBCPP_HIDE_FROM_ABI bool atomic_is_lock_free(const atomic<_Tp>* __o) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool atomic_is_lock_free(const atomic<_Tp>* __o) _NOEXCEPT { return __o->is_lock_free(); } @@ -516,25 +493,25 @@ atomic_store_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d, me // atomic_load template -_LIBCPP_HIDE_FROM_ABI _Tp atomic_load(const volatile atomic<_Tp>* __o) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _Tp atomic_load(const volatile atomic<_Tp>* __o) _NOEXCEPT { return __o->load(); } template -_LIBCPP_HIDE_FROM_ABI _Tp atomic_load(const atomic<_Tp>* __o) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _Tp atomic_load(const atomic<_Tp>* __o) _NOEXCEPT { return __o->load(); } // atomic_load_explicit template -_LIBCPP_HIDE_FROM_ABI _Tp atomic_load_explicit(const volatile atomic<_Tp>* __o, memory_order __m) _NOEXCEPT - _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _Tp +atomic_load_explicit(const volatile atomic<_Tp>* __o, memory_order __m) _NOEXCEPT _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) { return __o->load(__m); } template -_LIBCPP_HIDE_FROM_ABI _Tp atomic_load_explicit(const atomic<_Tp>* __o, memory_order __m) _NOEXCEPT +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _Tp atomic_load_explicit(const atomic<_Tp>* __o, memory_order __m) _NOEXCEPT _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) { return __o->load(__m); } @@ -642,28 +619,27 @@ _LIBCPP_HIDE_FROM_ABI bool atomic_compare_exchange_strong_explicit( // atomic_wait template -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void +_LIBCPP_HIDE_FROM_ABI void atomic_wait(const volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v) _NOEXCEPT { return __o->wait(__v); } template -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void -atomic_wait(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v) _NOEXCEPT { +_LIBCPP_HIDE_FROM_ABI void atomic_wait(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v) _NOEXCEPT { return __o->wait(__v); } // atomic_wait_explicit template -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void +_LIBCPP_HIDE_FROM_ABI void atomic_wait_explicit(const volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v, memory_order __m) _NOEXCEPT _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) { return __o->wait(__v, __m); } template -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void +_LIBCPP_HIDE_FROM_ABI void atomic_wait_explicit(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v, memory_order __m) _NOEXCEPT _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) { return __o->wait(__v, __m); @@ -672,22 +648,22 @@ atomic_wait_explicit(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __ // atomic_notify_one template -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_one(volatile atomic<_Tp>* __o) _NOEXCEPT { +_LIBCPP_HIDE_FROM_ABI void atomic_notify_one(volatile atomic<_Tp>* __o) _NOEXCEPT { __o->notify_one(); } template -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_one(atomic<_Tp>* __o) _NOEXCEPT { +_LIBCPP_HIDE_FROM_ABI void atomic_notify_one(atomic<_Tp>* __o) _NOEXCEPT { __o->notify_one(); } // atomic_notify_all template -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_all(volatile atomic<_Tp>* __o) _NOEXCEPT { +_LIBCPP_HIDE_FROM_ABI void atomic_notify_all(volatile atomic<_Tp>* __o) _NOEXCEPT { __o->notify_all(); } template -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_all(atomic<_Tp>* __o) _NOEXCEPT { +_LIBCPP_HIDE_FROM_ABI void atomic_notify_all(atomic<_Tp>* __o) _NOEXCEPT { __o->notify_all(); } diff --git a/lib/libcxx/include/__atomic/atomic_flag.h b/lib/libcxx/include/__atomic/atomic_flag.h index 5cc6fb0c55..42864c869d 100644 --- a/lib/libcxx/include/__atomic/atomic_flag.h +++ b/lib/libcxx/include/__atomic/atomic_flag.h @@ -10,6 +10,7 @@ #define _LIBCPP___ATOMIC_ATOMIC_FLAG_H #include <__atomic/atomic_sync.h> +#include <__atomic/atomic_waitable_traits.h> #include <__atomic/contention_t.h> #include <__atomic/memory_order.h> #include <__atomic/support.h> @@ -49,22 +50,16 @@ struct atomic_flag { } #if _LIBCPP_STD_VER >= 20 - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(bool __v, memory_order __m = memory_order_seq_cst) const - volatile _NOEXCEPT { + _LIBCPP_HIDE_FROM_ABI void wait(bool __v, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT { std::__atomic_wait(*this, _LIBCPP_ATOMIC_FLAG_TYPE(__v), __m); } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void - wait(bool __v, memory_order __m = memory_order_seq_cst) const _NOEXCEPT { + _LIBCPP_HIDE_FROM_ABI void wait(bool __v, memory_order __m = memory_order_seq_cst) const _NOEXCEPT { std::__atomic_wait(*this, _LIBCPP_ATOMIC_FLAG_TYPE(__v), __m); } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() volatile _NOEXCEPT { - std::__atomic_notify_one(*this); - } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() _NOEXCEPT { std::__atomic_notify_one(*this); } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() volatile _NOEXCEPT { - std::__atomic_notify_all(*this); - } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT { std::__atomic_notify_all(*this); } + _LIBCPP_HIDE_FROM_ABI void notify_one() volatile _NOEXCEPT { std::__atomic_notify_one(*this); } + _LIBCPP_HIDE_FROM_ABI void notify_one() _NOEXCEPT { std::__atomic_notify_one(*this); } + _LIBCPP_HIDE_FROM_ABI void notify_all() volatile _NOEXCEPT { std::__atomic_notify_all(*this); } + _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT { std::__atomic_notify_all(*this); } #endif #if _LIBCPP_STD_VER >= 20 @@ -80,8 +75,11 @@ struct atomic_flag { atomic_flag& operator=(const atomic_flag&) volatile = delete; }; +#if _LIBCPP_STD_VER >= 20 template <> struct __atomic_waitable_traits { + using __value_type _LIBCPP_NODEBUG = _LIBCPP_ATOMIC_FLAG_TYPE; + static _LIBCPP_HIDE_FROM_ABI _LIBCPP_ATOMIC_FLAG_TYPE __atomic_load(const atomic_flag& __a, memory_order __order) { return std::__cxx_atomic_load(&__a.__a_, __order); } @@ -101,6 +99,7 @@ struct __atomic_waitable_traits { return std::addressof(__a.__a_); } }; +#endif // _LIBCPP_STD_VER >= 20 inline _LIBCPP_HIDE_FROM_ABI bool atomic_flag_test(const volatile atomic_flag* __o) _NOEXCEPT { return __o->test(); } @@ -143,43 +142,26 @@ inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_clear_explicit(atomic_flag* __o, m } #if _LIBCPP_STD_VER >= 20 -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void -atomic_flag_wait(const volatile atomic_flag* __o, bool __v) _NOEXCEPT { +inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_wait(const volatile atomic_flag* __o, bool __v) _NOEXCEPT { __o->wait(__v); } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void -atomic_flag_wait(const atomic_flag* __o, bool __v) _NOEXCEPT { - __o->wait(__v); -} +inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_wait(const atomic_flag* __o, bool __v) _NOEXCEPT { __o->wait(__v); } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_wait_explicit(const volatile atomic_flag* __o, bool __v, memory_order __m) _NOEXCEPT { __o->wait(__v, __m); } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_wait_explicit(const atomic_flag* __o, bool __v, memory_order __m) _NOEXCEPT { __o->wait(__v, __m); } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void -atomic_flag_notify_one(volatile atomic_flag* __o) _NOEXCEPT { - __o->notify_one(); -} - -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_notify_one(atomic_flag* __o) _NOEXCEPT { - __o->notify_one(); -} - -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void -atomic_flag_notify_all(volatile atomic_flag* __o) _NOEXCEPT { - __o->notify_all(); -} - -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_notify_all(atomic_flag* __o) _NOEXCEPT { - __o->notify_all(); -} +inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_notify_one(volatile atomic_flag* __o) _NOEXCEPT { __o->notify_one(); } +inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_notify_one(atomic_flag* __o) _NOEXCEPT { __o->notify_one(); } +inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_notify_all(volatile atomic_flag* __o) _NOEXCEPT { __o->notify_all(); } +inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_notify_all(atomic_flag* __o) _NOEXCEPT { __o->notify_all(); } #endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__atomic/atomic_ref.h b/lib/libcxx/include/__atomic/atomic_ref.h index b5493662c5..69edbfe6ec 100644 --- a/lib/libcxx/include/__atomic/atomic_ref.h +++ b/lib/libcxx/include/__atomic/atomic_ref.h @@ -19,7 +19,9 @@ #include <__assert> #include <__atomic/atomic_sync.h> +#include <__atomic/atomic_waitable_traits.h> #include <__atomic/check_memory_order.h> +#include <__atomic/floating_point_helper.h> #include <__atomic/memory_order.h> #include <__atomic/to_gcc_order.h> #include <__concepts/arithmetic.h> @@ -121,7 +123,9 @@ public: static constexpr bool is_always_lock_free = __atomic_always_lock_free(sizeof(_Tp), std::addressof(__get_aligner_instance::__instance)); - _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const noexcept { return __atomic_is_lock_free(sizeof(_Tp), __ptr_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const noexcept { + return __atomic_is_lock_free(sizeof(_Tp), __ptr_); + } _LIBCPP_HIDE_FROM_ABI void store(_Tp __desired, memory_order __order = memory_order::seq_cst) const noexcept _LIBCPP_CHECK_STORE_MEMORY_ORDER(__order) { @@ -136,7 +140,7 @@ public: return __desired; } - _LIBCPP_HIDE_FROM_ABI _Tp load(memory_order __order = memory_order::seq_cst) const noexcept + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _Tp load(memory_order __order = memory_order::seq_cst) const noexcept _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__order) { _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( __order == memory_order::relaxed || __order == memory_order::consume || __order == memory_order::acquire || @@ -219,6 +223,9 @@ public: } _LIBCPP_HIDE_FROM_ABI void notify_one() const noexcept { std::__atomic_notify_one(*this); } _LIBCPP_HIDE_FROM_ABI void notify_all() const noexcept { std::__atomic_notify_all(*this); } +# if _LIBCPP_STD_VER >= 26 + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp* address() const noexcept { return __ptr_; } +# endif protected: using _Aligned_Tp [[__gnu__::__aligned__(required_alignment), __gnu__::__nodebug__]] = _Tp; @@ -229,6 +236,8 @@ protected: template struct __atomic_waitable_traits<__atomic_ref_base<_Tp>> { + using __value_type _LIBCPP_NODEBUG = _Tp; + static _LIBCPP_HIDE_FROM_ABI _Tp __atomic_load(const __atomic_ref_base<_Tp>& __a, memory_order __order) { return __a.load(__order); } @@ -322,20 +331,28 @@ struct atomic_ref<_Tp> : public __atomic_ref_base<_Tp> { atomic_ref& operator=(const atomic_ref&) = delete; _LIBCPP_HIDE_FROM_ABI _Tp fetch_add(_Tp __arg, memory_order __order = memory_order_seq_cst) const noexcept { - _Tp __old = this->load(memory_order_relaxed); - _Tp __new = __old + __arg; - while (!this->compare_exchange_weak(__old, __new, __order, memory_order_relaxed)) { - __new = __old + __arg; + if constexpr (std::__has_rmw_builtin<_Tp>()) { + return __atomic_fetch_add(this->__ptr_, __arg, std::__to_gcc_order(__order)); + } else { + _Tp __old = this->load(memory_order_relaxed); + _Tp __new = __old + __arg; + while (!this->compare_exchange_weak(__old, __new, __order, memory_order_relaxed)) { + __new = __old + __arg; + } + return __old; } - return __old; } _LIBCPP_HIDE_FROM_ABI _Tp fetch_sub(_Tp __arg, memory_order __order = memory_order_seq_cst) const noexcept { - _Tp __old = this->load(memory_order_relaxed); - _Tp __new = __old - __arg; - while (!this->compare_exchange_weak(__old, __new, __order, memory_order_relaxed)) { - __new = __old - __arg; + if constexpr (std::__has_rmw_builtin<_Tp>()) { + return __atomic_fetch_sub(this->__ptr_, __arg, std::__to_gcc_order(__order)); + } else { + _Tp __old = this->load(memory_order_relaxed); + _Tp __new = __old - __arg; + while (!this->compare_exchange_weak(__old, __new, __order, memory_order_relaxed)) { + __new = __old - __arg; + } + return __old; } - return __old; } _LIBCPP_HIDE_FROM_ABI _Tp operator+=(_Tp __arg) const noexcept { return fetch_add(__arg) + __arg; } diff --git a/lib/libcxx/include/__atomic/atomic_sync.h b/lib/libcxx/include/__atomic/atomic_sync.h index 0dae448d64..1234cdea50 100644 --- a/lib/libcxx/include/__atomic/atomic_sync.h +++ b/lib/libcxx/include/__atomic/atomic_sync.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___ATOMIC_ATOMIC_SYNC_H #define _LIBCPP___ATOMIC_ATOMIC_SYNC_H +#include <__atomic/atomic_waitable_traits.h> #include <__atomic/contention_t.h> #include <__atomic/memory_order.h> #include <__atomic/to_gcc_order.h> @@ -19,6 +20,7 @@ #include <__type_traits/conjunction.h> #include <__type_traits/decay.h> #include <__type_traits/invoke.h> +#include <__type_traits/is_same.h> #include <__type_traits/void_t.h> #include <__utility/declval.h> #include @@ -29,50 +31,89 @@ _LIBCPP_BEGIN_NAMESPACE_STD -// The customisation points to enable the following functions: -// - __atomic_wait -// - __atomic_wait_unless -// - __atomic_notify_one -// - __atomic_notify_all -// Note that std::atomic::wait was back-ported to C++03 -// The below implementations look ugly to support C++03 -template -struct __atomic_waitable_traits { - template - static void __atomic_load(_AtomicWaitable&&, memory_order) = delete; - - template - static void __atomic_contention_address(_AtomicWaitable&&) = delete; -}; - -template -struct __atomic_waitable : false_type {}; - -template -struct __atomic_waitable< _Tp, - __void_t >::__atomic_load( - std::declval(), std::declval())), - decltype(__atomic_waitable_traits<__decay_t<_Tp> >::__atomic_contention_address( - std::declval()))> > : true_type {}; - #if _LIBCPP_STD_VER >= 20 # if _LIBCPP_HAS_THREADS -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(void const volatile*) _NOEXCEPT; -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(void const volatile*) _NOEXCEPT; -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t -__libcpp_atomic_monitor(void const volatile*) _NOEXCEPT; -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void -__libcpp_atomic_wait(void const volatile*, __cxx_contention_t) _NOEXCEPT; +# if !_LIBCPP_AVAILABILITY_HAS_NEW_SYNC -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void -__cxx_atomic_notify_one(__cxx_atomic_contention_t const volatile*) _NOEXCEPT; -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void -__cxx_atomic_notify_all(__cxx_atomic_contention_t const volatile*) _NOEXCEPT; -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t +// old dylib interface kept for backwards compatibility +_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(void const volatile*) _NOEXCEPT; +_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(void const volatile*) _NOEXCEPT; +_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __libcpp_atomic_monitor(void const volatile*) _NOEXCEPT; +_LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_wait(void const volatile*, __cxx_contention_t) _NOEXCEPT; + +_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(__cxx_atomic_contention_t const volatile*) _NOEXCEPT; +_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(__cxx_atomic_contention_t const volatile*) _NOEXCEPT; +_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __libcpp_atomic_monitor(__cxx_atomic_contention_t const volatile*) _NOEXCEPT; -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void +_LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_wait(__cxx_atomic_contention_t const volatile*, __cxx_contention_t) _NOEXCEPT; +# endif // !_LIBCPP_AVAILABILITY_HAS_NEW_SYNC + +// new dylib interface + +// return the global contention state's current value for the address +_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t +__atomic_monitor_global(void const* __address) _NOEXCEPT; + +// wait on the global contention state to be changed from the given value for the address +_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void +__atomic_wait_global_table(void const* __address, __cxx_contention_t __monitor_value) _NOEXCEPT; + +// notify one waiter waiting on the global contention state for the address +_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_one_global_table(void const*) _NOEXCEPT; + +// notify all waiters waiting on the global contention state for the address +_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_all_global_table(void const*) _NOEXCEPT; + +// wait on the address directly with the native platform wait +template +_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void +__atomic_wait_native(void const* __address, void const* __old_value) _NOEXCEPT; + +// notify one waiter waiting on the address directly with the native platform wait +template +_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_one_native(const void*) _NOEXCEPT; + +// notify all waiters waiting on the address directly with the native platform wait +template +_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_all_native(const void*) _NOEXCEPT; + +# if _LIBCPP_AVAILABILITY_HAS_NEW_SYNC + +template +struct __atomic_wait_backoff_impl { + const _AtomicWaitable& __a_; + _Poll __poll_; + memory_order __order_; + + using __waitable_traits _LIBCPP_NODEBUG = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >; + using __value_type _LIBCPP_NODEBUG = typename __waitable_traits::__value_type; + + _LIBCPP_HIDE_FROM_ABI __backoff_results operator()(chrono::nanoseconds __elapsed) const { + if (__elapsed > chrono::microseconds(4)) { + auto __contention_address = const_cast( + static_cast(__waitable_traits::__atomic_contention_address(__a_))); + + if constexpr (__has_native_atomic_wait<__value_type>) { + auto __atomic_value = __waitable_traits::__atomic_load(__a_, __order_); + if (__poll_(__atomic_value)) + return __backoff_results::__poll_success; + std::__atomic_wait_native(__contention_address, std::addressof(__atomic_value)); + } else { + __cxx_contention_t __monitor_val = std::__atomic_monitor_global(__contention_address); + auto __atomic_value = __waitable_traits::__atomic_load(__a_, __order_); + if (__poll_(__atomic_value)) + return __backoff_results::__poll_success; + std::__atomic_wait_global_table(__contention_address, __monitor_val); + } + } else { + } // poll + return __backoff_results::__continue_poll; + } +}; + +# else // _LIBCPP_AVAILABILITY_HAS_NEW_SYNC template struct __atomic_wait_backoff_impl { @@ -82,7 +123,6 @@ struct __atomic_wait_backoff_impl { using __waitable_traits _LIBCPP_NODEBUG = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >; - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool __update_monitor_val_and_poll(__cxx_atomic_contention_t const volatile*, __cxx_contention_t& __monitor_val) const { // In case the contention type happens to be __cxx_atomic_contention_t, i.e. __cxx_atomic_impl, @@ -95,7 +135,6 @@ struct __atomic_wait_backoff_impl { return __poll_(__monitor_val); } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool __update_monitor_val_and_poll(void const volatile* __contention_address, __cxx_contention_t& __monitor_val) const { // In case the contention type is anything else, platform wait is monitoring a __cxx_atomic_contention_t @@ -105,20 +144,21 @@ struct __atomic_wait_backoff_impl { return __poll_(__current_val); } - _LIBCPP_AVAILABILITY_SYNC - _LIBCPP_HIDE_FROM_ABI bool operator()(chrono::nanoseconds __elapsed) const { + _LIBCPP_HIDE_FROM_ABI __backoff_results operator()(chrono::nanoseconds __elapsed) const { if (__elapsed > chrono::microseconds(4)) { auto __contention_address = __waitable_traits::__atomic_contention_address(__a_); __cxx_contention_t __monitor_val; if (__update_monitor_val_and_poll(__contention_address, __monitor_val)) - return true; + return __backoff_results::__poll_success; std::__libcpp_atomic_wait(__contention_address, __monitor_val); } else { } // poll - return false; + return __backoff_results::__continue_poll; } }; +# endif // _LIBCPP_AVAILABILITY_HAS_NEW_SYNC + // The semantics of this function are similar to `atomic`'s // `.wait(T old, std::memory_order order)`, but instead of having a hardcoded // predicate (is the loaded value unequal to `old`?), the predicate function is @@ -128,9 +168,8 @@ struct __atomic_wait_backoff_impl { // `false`, it must set the argument to its current understanding of the atomic // value. The predicate function must not return `false` spuriously. template -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void -__atomic_wait_unless(const _AtomicWaitable& __a, memory_order __order, _Poll&& __poll) { - static_assert(__atomic_waitable<_AtomicWaitable>::value, ""); +_LIBCPP_HIDE_FROM_ABI void __atomic_wait_unless(const _AtomicWaitable& __a, memory_order __order, _Poll&& __poll) { + static_assert(__atomic_waitable<_AtomicWaitable>); __atomic_wait_backoff_impl<_AtomicWaitable, __decay_t<_Poll> > __backoff_fn = {__a, __poll, __order}; std::__libcpp_thread_poll_with_backoff( /* poll */ @@ -141,18 +180,52 @@ __atomic_wait_unless(const _AtomicWaitable& __a, memory_order __order, _Poll&& _ /* backoff */ __backoff_fn); } +# if _LIBCPP_AVAILABILITY_HAS_NEW_SYNC + template -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void __atomic_notify_one(const _AtomicWaitable& __a) { - static_assert(__atomic_waitable<_AtomicWaitable>::value, ""); +_LIBCPP_HIDE_FROM_ABI void __atomic_notify_one(const _AtomicWaitable& __a) { + static_assert(__atomic_waitable<_AtomicWaitable>); + using __value_type _LIBCPP_NODEBUG = typename __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__value_type; + using __waitable_traits _LIBCPP_NODEBUG = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >; + auto __contention_address = + const_cast(static_cast(__waitable_traits::__atomic_contention_address(__a))); + if constexpr (__has_native_atomic_wait<__value_type>) { + std::__atomic_notify_one_native(__contention_address); + } else { + std::__atomic_notify_one_global_table(__contention_address); + } +} + +template +_LIBCPP_HIDE_FROM_ABI void __atomic_notify_all(const _AtomicWaitable& __a) { + static_assert(__atomic_waitable<_AtomicWaitable>); + using __value_type _LIBCPP_NODEBUG = typename __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__value_type; + using __waitable_traits _LIBCPP_NODEBUG = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >; + auto __contention_address = + const_cast(static_cast(__waitable_traits::__atomic_contention_address(__a))); + if constexpr (__has_native_atomic_wait<__value_type>) { + std::__atomic_notify_all_native(__contention_address); + } else { + std::__atomic_notify_all_global_table(__contention_address); + } +} + +# else // _LIBCPP_AVAILABILITY_HAS_NEW_SYNC + +template +_LIBCPP_HIDE_FROM_ABI void __atomic_notify_one(const _AtomicWaitable& __a) { + static_assert(__atomic_waitable<_AtomicWaitable>); std::__cxx_atomic_notify_one(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a)); } template -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void __atomic_notify_all(const _AtomicWaitable& __a) { - static_assert(__atomic_waitable<_AtomicWaitable>::value, ""); +_LIBCPP_HIDE_FROM_ABI void __atomic_notify_all(const _AtomicWaitable& __a) { + static_assert(__atomic_waitable<_AtomicWaitable>); std::__cxx_atomic_notify_all(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a)); } +# endif + # else // _LIBCPP_HAS_THREADS template @@ -180,9 +253,8 @@ _LIBCPP_HIDE_FROM_ABI bool __cxx_nonatomic_compare_equal(_Tp const& __lhs, _Tp c } template -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void -__atomic_wait(_AtomicWaitable& __a, _Tp __val, memory_order __order) { - static_assert(__atomic_waitable<_AtomicWaitable>::value, ""); +_LIBCPP_HIDE_FROM_ABI void __atomic_wait(_AtomicWaitable& __a, _Tp __val, memory_order __order) { + static_assert(__atomic_waitable<_AtomicWaitable>); std::__atomic_wait_unless(__a, __order, [&](_Tp const& __current) { return !std::__cxx_nonatomic_compare_equal(__current, __val); }); diff --git a/lib/libcxx/include/__atomic/atomic_sync_timed.h b/lib/libcxx/include/__atomic/atomic_sync_timed.h new file mode 100644 index 0000000000..7daff73db7 --- /dev/null +++ b/lib/libcxx/include/__atomic/atomic_sync_timed.h @@ -0,0 +1,144 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ATOMIC_ATOMIC_SYNC_TIMED_H +#define _LIBCPP___ATOMIC_ATOMIC_SYNC_TIMED_H + +#include <__atomic/atomic_waitable_traits.h> +#include <__atomic/contention_t.h> +#include <__atomic/memory_order.h> +#include <__atomic/to_gcc_order.h> +#include <__chrono/duration.h> +#include <__config> +#include <__memory/addressof.h> +#include <__thread/poll_with_backoff.h> +#include <__thread/timed_backoff_policy.h> +#include <__type_traits/conjunction.h> +#include <__type_traits/decay.h> +#include <__type_traits/has_unique_object_representation.h> +#include <__type_traits/invoke.h> +#include <__type_traits/is_same.h> +#include <__type_traits/is_trivially_copyable.h> +#include <__type_traits/void_t.h> +#include <__utility/declval.h> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 20 +# if _LIBCPP_HAS_THREADS && _LIBCPP_AVAILABILITY_HAS_NEW_SYNC + +_LIBCPP_AVAILABILITY_NEW_SYNC +_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __atomic_monitor_global(void const* __address) _NOEXCEPT; + +// wait on the global contention state to be changed from the given value for the address +_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void __atomic_wait_global_table_with_timeout( + void const* __address, __cxx_contention_t __monitor_value, uint64_t __timeout_ns) _NOEXCEPT; + +// wait on the address directly with the native platform wait +template +_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void +__atomic_wait_native_with_timeout(void const* __address, void const* __old_value, uint64_t __timeout_ns) _NOEXCEPT; + +template +struct __atomic_wait_timed_backoff_impl { + const _AtomicWaitable& __a_; + _Poll __poll_; + memory_order __order_; + chrono::duration<_Rep, _Period> __rel_time_; + + using __waitable_traits _LIBCPP_NODEBUG = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >; + using __value_type _LIBCPP_NODEBUG = typename __waitable_traits::__value_type; + + _LIBCPP_HIDE_FROM_ABI __backoff_results operator()(chrono::nanoseconds __elapsed) const { + if (__elapsed > chrono::microseconds(4)) { + auto __contention_address = const_cast( + static_cast(__waitable_traits::__atomic_contention_address(__a_))); + + uint64_t __timeout_ns = + static_cast((chrono::duration_cast(__rel_time_) - __elapsed).count()); + + if constexpr (__has_native_atomic_wait<__value_type>) { + auto __atomic_value = __waitable_traits::__atomic_load(__a_, __order_); + if (__poll_(__atomic_value)) + return __backoff_results::__poll_success; + std::__atomic_wait_native_with_timeout( + __contention_address, std::addressof(__atomic_value), __timeout_ns); + } else { + __cxx_contention_t __monitor_val = std::__atomic_monitor_global(__contention_address); + auto __atomic_value = __waitable_traits::__atomic_load(__a_, __order_); + if (__poll_(__atomic_value)) + return __backoff_results::__poll_success; + std::__atomic_wait_global_table_with_timeout(__contention_address, __monitor_val, __timeout_ns); + } + } else { + } // poll + return __backoff_results::__continue_poll; + } +}; + +// The semantics of this function are similar to `atomic`'s +// `.wait(T old, std::memory_order order)` with a timeout, but instead of having a hardcoded +// predicate (is the loaded value unequal to `old`?), the predicate function is +// specified as an argument. The loaded value is given as an in-out argument to +// the predicate. If the predicate function returns `true`, +// `__atomic_wait_unless_with_timeout` will return. If the predicate function returns +// `false`, it must set the argument to its current understanding of the atomic +// value. The predicate function must not return `false` spuriously. +template +_LIBCPP_HIDE_FROM_ABI bool __atomic_wait_unless_with_timeout( + const _AtomicWaitable& __a, + memory_order __order, + _Poll&& __poll, + chrono::duration<_Rep, _Period> const& __rel_time) { + static_assert(__atomic_waitable<_AtomicWaitable>, ""); + __atomic_wait_timed_backoff_impl<_AtomicWaitable, __decay_t<_Poll>, _Rep, _Period> __backoff_fn = { + __a, __poll, __order, __rel_time}; + auto __poll_result = std::__libcpp_thread_poll_with_backoff( + /* poll */ + [&]() { + auto __current_val = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_load(__a, __order); + return __poll(__current_val); + }, + /* backoff */ __backoff_fn, + __rel_time); + + return __poll_result == __poll_with_backoff_results::__poll_success; +} + +# elif _LIBCPP_HAS_THREADS // _LIBCPP_HAS_THREADS && _LIBCPP_AVAILABILITY_HAS_NEW_SYNC + +template +_LIBCPP_HIDE_FROM_ABI bool __atomic_wait_unless_with_timeout( + const _AtomicWaitable& __a, + memory_order __order, + _Poll&& __poll, + chrono::duration<_Rep, _Period> const& __rel_time) { + auto __res = std::__libcpp_thread_poll_with_backoff( + /* poll */ + [&]() { + auto __current_val = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_load(__a, __order); + return __poll(__current_val); + }, + /* backoff */ __libcpp_timed_backoff_policy(), + __rel_time); + return __res == __poll_with_backoff_results::__poll_success; +} + +# endif // _LIBCPP_HAS_THREADS && _LIBCPP_AVAILABILITY_HAS_NEW_SYNC + +#endif // C++20 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___ATOMIC_ATOMIC_SYNC_TIMED_H diff --git a/lib/libcxx/include/__atomic/atomic_waitable_traits.h b/lib/libcxx/include/__atomic/atomic_waitable_traits.h new file mode 100644 index 0000000000..849c33122b --- /dev/null +++ b/lib/libcxx/include/__atomic/atomic_waitable_traits.h @@ -0,0 +1,103 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ATOMIC_ATOMIC_WAITABLE_TRAITS_H +#define _LIBCPP___ATOMIC_ATOMIC_WAITABLE_TRAITS_H + +#include <__atomic/contention_t.h> +#include <__atomic/memory_order.h> +#include <__config> +#include <__type_traits/decay.h> +#include <__type_traits/has_unique_object_representation.h> +#include <__type_traits/is_same.h> +#include <__type_traits/is_trivially_copyable.h> +#include <__type_traits/void_t.h> +#include <__utility/declval.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 20 + +// The customisation points to enable the following functions: +// - __atomic_wait +// - __atomic_wait_unless +// - __atomic_notify_one +// - __atomic_notify_all +template +struct __atomic_waitable_traits { + using __value_type _LIBCPP_NODEBUG = void; + + template + static void __atomic_load(_AtomicWaitable&&, memory_order) = delete; + + template + static void __atomic_contention_address(_AtomicWaitable&&) = delete; +}; + +template +concept __atomic_waitable = requires(const _Tp __t, memory_order __order) { + typename __atomic_waitable_traits<__decay_t<_Tp> >::__value_type; + { __atomic_waitable_traits<__decay_t<_Tp> >::__atomic_load(__t, __order) }; + { __atomic_waitable_traits<__decay_t<_Tp> >::__atomic_contention_address(__t) }; +}; + +# ifdef __linux__ +# define _LIBCPP_NATIVE_PLATFORM_WAIT_SIZES(_APPLY) _APPLY(4) +# elif defined(__APPLE__) +# define _LIBCPP_NATIVE_PLATFORM_WAIT_SIZES(_APPLY) \ + _APPLY(4) \ + _APPLY(8) +# elif defined(__FreeBSD__) && __SIZEOF_LONG__ == 8 +# define _LIBCPP_NATIVE_PLATFORM_WAIT_SIZES(_APPLY) _APPLY(8) +# elif defined(_WIN32) +# define _LIBCPP_NATIVE_PLATFORM_WAIT_SIZES(_APPLY) _APPLY(8) +# else +# define _LIBCPP_NATIVE_PLATFORM_WAIT_SIZES(_APPLY) _APPLY(sizeof(__cxx_contention_t)) +# endif // __linux__ + +// concepts defines the types are supported natively by the platform's wait + +# if defined(_LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE) + +template +_LIBCPP_HIDE_FROM_ABI constexpr bool __has_native_atomic_wait_impl() { + if (alignof(_Tp) % sizeof(_Tp) != 0) + return false; + switch (sizeof(_Tp)) { +# define _LIBCPP_MAKE_CASE(n) \ + case n: \ + return true; + _LIBCPP_NATIVE_PLATFORM_WAIT_SIZES(_LIBCPP_MAKE_CASE) + default: + return false; +# undef _LIBCPP_MAKE_CASE + }; +} + +template +concept __has_native_atomic_wait = + has_unique_object_representations_v<_Tp> && is_trivially_copyable_v<_Tp> && + std::__has_native_atomic_wait_impl<_Tp>(); + +# else // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE + +template +concept __has_native_atomic_wait = is_same_v<_Tp, __cxx_contention_t>; + +# endif // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE + +#endif // C++20 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___ATOMIC_ATOMIC_WAITABLE_TRAITS_H diff --git a/lib/libcxx/include/__atomic/contention_t.h b/lib/libcxx/include/__atomic/contention_t.h index 5b42a0125f..b7e370439e 100644 --- a/lib/libcxx/include/__atomic/contention_t.h +++ b/lib/libcxx/include/__atomic/contention_t.h @@ -19,11 +19,35 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if defined(__linux__) || (defined(_AIX) && !defined(__64BIT__)) +// The original definition of `__cxx_contention_t` seemed a bit arbitrary. +// When we enable the _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE ABI, +// use definitions that are based on what the underlying platform supports +// instead. +#if defined(_LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE) + +# ifdef __linux__ using __cxx_contention_t _LIBCPP_NODEBUG = int32_t; -#else +# elif defined(__APPLE__) using __cxx_contention_t _LIBCPP_NODEBUG = int64_t; -#endif // __linux__ || (_AIX && !__64BIT__) +# elif defined(__FreeBSD__) && __SIZEOF_LONG__ == 8 +using __cxx_contention_t _LIBCPP_NODEBUG = int64_t; +# elif defined(_AIX) && !defined(__64BIT__) +using __cxx_contention_t _LIBCPP_NODEBUG = int32_t; +# elif defined(_WIN32) +using __cxx_contention_t _LIBCPP_NODEBUG = int64_t; +# else +using __cxx_contention_t _LIBCPP_NODEBUG = int64_t; +# endif // __linux__ + +#else // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE + +# if defined(__linux__) || (defined(_AIX) && !defined(__64BIT__)) +using __cxx_contention_t _LIBCPP_NODEBUG = int32_t; +# else +using __cxx_contention_t _LIBCPP_NODEBUG = int64_t; +# endif // __linux__ || (_AIX && !__64BIT__) + +#endif // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE using __cxx_atomic_contention_t _LIBCPP_NODEBUG = __cxx_atomic_impl<__cxx_contention_t>; diff --git a/lib/libcxx/include/__atomic/floating_point_helper.h b/lib/libcxx/include/__atomic/floating_point_helper.h new file mode 100644 index 0000000000..8762ec234b --- /dev/null +++ b/lib/libcxx/include/__atomic/floating_point_helper.h @@ -0,0 +1,55 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ATOMIC_FLOATING_POINT_HELPER_H +#define _LIBCPP___ATOMIC_FLOATING_POINT_HELPER_H + +#include <__config> +#include <__type_traits/is_floating_point.h> +#include <__type_traits/is_same.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 20 + +template +_LIBCPP_HIDE_FROM_ABI constexpr bool __is_fp80_long_double() { + // Only x87-fp80 long double has 64-bit mantissa + return __LDBL_MANT_DIG__ == 64 && std::is_same_v<_Tp, long double>; +} + +template +_LIBCPP_HIDE_FROM_ABI constexpr bool __has_rmw_builtin() { + static_assert(std::is_floating_point_v<_Tp>); +# ifndef _LIBCPP_COMPILER_CLANG_BASED + return false; +# else + // The builtin __cxx_atomic_fetch_add errors during compilation for + // long double on platforms with fp80 format. + // For more details, see + // lib/Sema/SemaChecking.cpp function IsAllowedValueType + // LLVM Parser does not allow atomicrmw with x86_fp80 type. + // if (ValType->isSpecificBuiltinType(BuiltinType::LongDouble) && + // &Context.getTargetInfo().getLongDoubleFormat() == + // &llvm::APFloat::x87DoubleExtended()) + // For more info + // https://llvm.org/PR68602 + // https://reviews.llvm.org/D53965 + return !std::__is_fp80_long_double<_Tp>(); +# endif +} + +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___ATOMIC_FLOATING_POINT_HELPER_H diff --git a/lib/libcxx/include/__bit/countl.h b/lib/libcxx/include/__bit/countl.h index 0759140208..c95828f58d 100644 --- a/lib/libcxx/include/__bit/countl.h +++ b/lib/libcxx/include/__bit/countl.h @@ -24,7 +24,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT { - static_assert(__is_unsigned_integer_v<_Tp>, "__countl_zero requires an unsigned integer type"); return __builtin_clzg(__t, numeric_limits<_Tp>::digits); } @@ -37,7 +36,7 @@ template <__unsigned_integer _Tp> template <__unsigned_integer _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr int countl_one(_Tp __t) noexcept { - return __t != numeric_limits<_Tp>::max() ? std::countl_zero(static_cast<_Tp>(~__t)) : numeric_limits<_Tp>::digits; + return std::countl_zero(static_cast<_Tp>(~__t)); } #endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__bit/countr.h b/lib/libcxx/include/__bit/countr.h index f6c98695d3..16f689d6da 100644 --- a/lib/libcxx/include/__bit/countr.h +++ b/lib/libcxx/include/__bit/countr.h @@ -24,7 +24,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD template [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __countr_zero(_Tp __t) _NOEXCEPT { - static_assert(__is_unsigned_integer_v<_Tp>, "__countr_zero only works with unsigned types"); return __builtin_ctzg(__t, numeric_limits<_Tp>::digits); } @@ -37,7 +36,7 @@ template <__unsigned_integer _Tp> template <__unsigned_integer _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr int countr_one(_Tp __t) noexcept { - return __t != numeric_limits<_Tp>::max() ? std::countr_zero(static_cast<_Tp>(~__t)) : numeric_limits<_Tp>::digits; + return std::countr_zero(static_cast<_Tp>(~__t)); } #endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__bit/has_single_bit.h b/lib/libcxx/include/__bit/has_single_bit.h index b43e69323e..c49c518f2b 100644 --- a/lib/libcxx/include/__bit/has_single_bit.h +++ b/lib/libcxx/include/__bit/has_single_bit.h @@ -25,7 +25,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <__unsigned_integer _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool has_single_bit(_Tp __t) noexcept { - return __t != 0 && (((__t & (__t - 1)) == 0)); + return __builtin_popcountg(__t) == 1; } _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__bit/popcount.h b/lib/libcxx/include/__bit/popcount.h index 8d9ba09938..d104c8e8f0 100644 --- a/lib/libcxx/include/__bit/popcount.h +++ b/lib/libcxx/include/__bit/popcount.h @@ -23,7 +23,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD template [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __popcount(_Tp __t) _NOEXCEPT { - static_assert(__is_unsigned_integer_v<_Tp>, "__popcount only works with unsigned types"); return __builtin_popcountg(__t); } diff --git a/lib/libcxx/include/__bit/rotate.h b/lib/libcxx/include/__bit/rotate.h index c6f34bdaf6..fde9058887 100644 --- a/lib/libcxx/include/__bit/rotate.h +++ b/lib/libcxx/include/__bit/rotate.h @@ -22,46 +22,35 @@ _LIBCPP_BEGIN_NAMESPACE_STD // Writing two full functions for rotl and rotr makes it easier for the compiler // to optimize the code. On x86 this function becomes the ROL instruction and // the rotr function becomes the ROR instruction. -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotl(_Tp __x, int __s) _NOEXCEPT { - static_assert(__is_unsigned_integer_v<_Tp>, "__rotl requires an unsigned integer type"); - const int __n = numeric_limits<_Tp>::digits; - int __r = __s % __n; - - if (__r == 0) - return __x; - - if (__r > 0) - return (__x << __r) | (__x >> (__n - __r)); - - return (__x >> -__r) | (__x << (__n + __r)); -} - -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotr(_Tp __x, int __s) _NOEXCEPT { - static_assert(__is_unsigned_integer_v<_Tp>, "__rotr requires an unsigned integer type"); - const int __n = numeric_limits<_Tp>::digits; - int __r = __s % __n; - - if (__r == 0) - return __x; - - if (__r > 0) - return (__x >> __r) | (__x << (__n - __r)); - - return (__x << -__r) | (__x >> (__n + __r)); -} #if _LIBCPP_STD_VER >= 20 template <__unsigned_integer _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp rotl(_Tp __t, int __cnt) noexcept { - return std::__rotl(__t, __cnt); + const int __n = numeric_limits<_Tp>::digits; + int __r = __cnt % __n; + + if (__r == 0) + return __t; + + if (__r > 0) + return (__t << __r) | (__t >> (__n - __r)); + + return (__t >> -__r) | (__t << (__n + __r)); } template <__unsigned_integer _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp rotr(_Tp __t, int __cnt) noexcept { - return std::__rotr(__t, __cnt); + const int __n = numeric_limits<_Tp>::digits; + int __r = __cnt % __n; + + if (__r == 0) + return __t; + + if (__r > 0) + return (__t >> __r) | (__t << (__n - __r)); + + return (__t << -__r) | (__t >> (__n + __r)); } #endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__bit_reference b/lib/libcxx/include/__bit_reference index a3e6defd40..8daf3a2baa 100644 --- a/lib/libcxx/include/__bit_reference +++ b/lib/libcxx/include/__bit_reference @@ -15,8 +15,10 @@ #include <__algorithm/copy_backward.h> #include <__algorithm/copy_n.h> #include <__algorithm/equal.h> +#include <__algorithm/fill_n.h> #include <__algorithm/min.h> #include <__algorithm/rotate.h> +#include <__algorithm/specialized_algorithms.h> #include <__algorithm/swap_ranges.h> #include <__assert> #include <__bit/countr.h> @@ -137,7 +139,7 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 operator bool() const _NOEXCEPT { return static_cast(*__seg_ & __mask_); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool operator~() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool operator~() const _NOEXCEPT { return !static_cast(*this); } @@ -307,6 +309,15 @@ public: { } +#ifdef _LIBCPP_ABI_TRIVIALLY_COPYABLE_BIT_ITERATOR + template = 0> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator(const __bit_iterator<_Cp, false>& __it) _NOEXCEPT + : __seg_(__it.__seg_), + __ctz_(__it.__ctz_) {} + + _LIBCPP_HIDE_FROM_ABI __bit_iterator(const __bit_iterator&) = default; + _LIBCPP_HIDE_FROM_ABI __bit_iterator& operator=(const __bit_iterator&) = default; +#else // When _IsConst=false, this is the copy constructor. // It is non-trivial. Making it trivial would break ABI. // When _IsConst=true, this is a converting constructor; @@ -327,6 +338,7 @@ public: __ctz_ = __it.__ctz_; return *this; } +#endif _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference operator*() const _NOEXCEPT { _LIBCPP_ASSERT_INTERNAL(__ctz_ < __bits_per_word, "Dereferencing an invalid __bit_iterator."); @@ -467,20 +479,6 @@ private: template friend struct __bit_array; - template - _LIBCPP_CONSTEXPR_SINCE_CXX20 friend void - __fill_n_bool(__bit_iterator<_Dp, false> __first, typename __size_difference_type_traits<_Dp>::size_type __n); - - template - _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_aligned( - __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); - template - _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_unaligned( - __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); - template - _LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Dp, _IC>, __bit_iterator<_Dp, false> > - __copy_impl::operator()( - __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result) const; template _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_backward_aligned( __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); @@ -511,10 +509,20 @@ private: bool _IsConst1, bool _IsConst2, class _BinaryPredicate, - __enable_if_t<__desugars_to_v<__equal_tag, _BinaryPredicate, bool, bool>, int> > + class _Proj1, + class _Proj2, + __enable_if_t<__is_identity<_Proj1>::value && __is_identity<_Proj2>::value && + __desugars_to_v<__equal_tag, _BinaryPredicate, bool, bool>, + int> > _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool __equal_iter_impl( - __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>, _BinaryPredicate); - template , + __bit_iterator<_Dp, _IsConst1>, + __bit_iterator<_Dp, _IsConst2>, + _BinaryPredicate, + _Proj1&, + _Proj2&); + template friend typename __bit_iterator<_Dp, _IC>::difference_type _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __count_bool(__bit_iterator<_Dp, _IC>, typename __size_difference_type_traits<_Dp>::size_type); + + template + friend struct __specialized_algorithm; +}; + +template +struct __specialized_algorithm<_Algorithm::__fill_n, __single_iterator<__bit_iterator<_Cp, false> > > { + static const bool __has_algorithm = true; + + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static void + __impl(__bit_iterator<_Cp, false> __first, typename __size_difference_type_traits<_Cp>::size_type __n) { + using _It = __bit_iterator<_Cp, false>; + using __storage_type = typename _It::__storage_type; + + const int __bits_per_word = _It::__bits_per_word; + // do first partial word + if (__first.__ctz_ != 0) { + __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); + __storage_type __dn = std::min(__clz_f, __n); + std::__fill_masked_range(std::__to_address(__first.__seg_), __clz_f - __dn, __first.__ctz_, _FillVal); + __n -= __dn; + ++__first.__seg_; + } + // do middle whole words + __storage_type __nw = __n / __bits_per_word; + std::__fill_n(std::__to_address(__first.__seg_), __nw, _FillVal ? static_cast<__storage_type>(-1) : 0); + __n -= __nw * __bits_per_word; + // do last partial word + if (__n > 0) { + __first.__seg_ += __nw; + std::__fill_masked_range(std::__to_address(__first.__seg_), __bits_per_word - __n, 0u, _FillVal); + } + } + + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static __bit_iterator<_Cp, false> + operator()(__bit_iterator<_Cp, false> __first, _Size __n, const _Tp& __value) { + if (__n > 0) { + if (__value) + __impl(__first, __n); + else + __impl(__first, __n); + } + return __first + __n; + } +}; + +template +struct __specialized_algorithm<_Algorithm::__copy, + __iterator_pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, _IsConst> >, + __single_iterator<__bit_iterator<_Cp, false> > > { + static const bool __has_algorithm = true; + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static __bit_iterator<_Cp, false> + __aligned_impl(__bit_iterator<_Cp, _IsConst> __first, + __bit_iterator<_Cp, _IsConst> __last, + __bit_iterator<_Cp, false> __result) { + using _In = __bit_iterator<_Cp, _IsConst>; + using difference_type = typename _In::difference_type; + using __storage_type = typename _In::__storage_type; + + const int __bits_per_word = _In::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) { + // do first word + if (__first.__ctz_ != 0) { + unsigned __clz = __bits_per_word - __first.__ctz_; + difference_type __dn = std::min(static_cast(__clz), __n); + __n -= __dn; + __storage_type __m = std::__middle_mask<__storage_type>(__clz - __dn, __first.__ctz_); + __storage_type __b = *__first.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b; + __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); + ++__first.__seg_; + // __first.__ctz_ = 0; + } + // __first.__ctz_ == 0; + // do middle words + __storage_type __nw = __n / __bits_per_word; + std::copy(std::__to_address(__first.__seg_), + std::__to_address(__first.__seg_ + __nw), + std::__to_address(__result.__seg_)); + __n -= __nw * __bits_per_word; + __result.__seg_ += __nw; + // do last word + if (__n > 0) { + __first.__seg_ += __nw; + __storage_type __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n); + __storage_type __b = *__first.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b; + __result.__ctz_ = static_cast(__n); + } + } + return __result; + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static __bit_iterator<_Cp, false> + __unaligned_impl(__bit_iterator<_Cp, _IsConst> __first, + __bit_iterator<_Cp, _IsConst> __last, + __bit_iterator<_Cp, false> __result) { + using _In = __bit_iterator<_Cp, _IsConst>; + using difference_type = typename _In::difference_type; + using __storage_type = typename _In::__storage_type; + + const int __bits_per_word = _In::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) { + // do first word + if (__first.__ctz_ != 0) { + unsigned __clz_f = __bits_per_word - __first.__ctz_; + difference_type __dn = std::min(static_cast(__clz_f), __n); + __n -= __dn; + __storage_type __m = std::__middle_mask<__storage_type>(__clz_f - __dn, __first.__ctz_); + __storage_type __b = *__first.__seg_ & __m; + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r); + __m = std::__middle_mask<__storage_type>(__clz_r - __ddn, __result.__ctz_); + *__result.__seg_ &= ~__m; + if (__result.__ctz_ > __first.__ctz_) + *__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_); + else + *__result.__seg_ |= __b >> (__first.__ctz_ - __result.__ctz_); + __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__ddn + __result.__ctz_) % __bits_per_word); + __dn -= __ddn; + if (__dn > 0) { + __m = std::__trailing_mask<__storage_type>(__bits_per_word - __dn); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b >> (__first.__ctz_ + __ddn); + __result.__ctz_ = static_cast(__dn); + } + ++__first.__seg_; + // __first.__ctz_ = 0; + } + // __first.__ctz_ == 0; + // do middle words + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __m = std::__leading_mask<__storage_type>(__result.__ctz_); + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) { + __storage_type __b = *__first.__seg_; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b << __result.__ctz_; + ++__result.__seg_; + *__result.__seg_ &= __m; + *__result.__seg_ |= __b >> __clz_r; + } + // do last word + if (__n > 0) { + __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n); + __storage_type __b = *__first.__seg_ & __m; + __storage_type __dn = std::min(__n, static_cast(__clz_r)); + __m = std::__middle_mask<__storage_type>(__clz_r - __dn, __result.__ctz_); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b << __result.__ctz_; + __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); + __n -= __dn; + if (__n > 0) { + __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b >> __dn; + __result.__ctz_ = static_cast(__n); + } + } + } + return __result; + } + + _LIBCPP_HIDE_FROM_ABI + _LIBCPP_CONSTEXPR_SINCE_CXX20 static pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, false> > + operator()(__bit_iterator<_Cp, _IsConst> __first, + __bit_iterator<_Cp, _IsConst> __last, + __bit_iterator<_Cp, false> __result) { + if (__first.__ctz_ == __result.__ctz_) + return std::make_pair(__last, __aligned_impl(__first, __last, __result)); + return std::make_pair(__last, __unaligned_impl(__first, __last, __result)); + } }; _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__charconv/from_chars_integral.h b/lib/libcxx/include/__charconv/from_chars_integral.h index c1f033b37b..903e892cab 100644 --- a/lib/libcxx/include/__charconv/from_chars_integral.h +++ b/lib/libcxx/include/__charconv/from_chars_integral.h @@ -18,8 +18,8 @@ #include <__memory/addressof.h> #include <__system_error/errc.h> #include <__type_traits/enable_if.h> -#include <__type_traits/integral_constant.h> #include <__type_traits/is_integral.h> +#include <__type_traits/is_signed.h> #include <__type_traits/is_unsigned.h> #include <__type_traits/make_unsigned.h> #include diff --git a/lib/libcxx/include/__charconv/from_chars_result.h b/lib/libcxx/include/__charconv/from_chars_result.h index a7bfd6530a..b4ecea3d11 100644 --- a/lib/libcxx/include/__charconv/from_chars_result.h +++ b/lib/libcxx/include/__charconv/from_chars_result.h @@ -21,7 +21,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 17 -struct _LIBCPP_EXPORTED_FROM_ABI from_chars_result { +struct from_chars_result { const char* ptr; errc ec; # if _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__charconv/to_chars_integral.h b/lib/libcxx/include/__charconv/to_chars_integral.h index f10cc35668..6d42513926 100644 --- a/lib/libcxx/include/__charconv/to_chars_integral.h +++ b/lib/libcxx/include/__charconv/to_chars_integral.h @@ -24,6 +24,7 @@ #include <__type_traits/integral_constant.h> #include <__type_traits/is_integral.h> #include <__type_traits/is_same.h> +#include <__type_traits/is_signed.h> #include <__type_traits/make_32_64_or_128_bit.h> #include <__type_traits/make_unsigned.h> #include <__utility/unreachable.h> diff --git a/lib/libcxx/include/__charconv/to_chars_result.h b/lib/libcxx/include/__charconv/to_chars_result.h index 41dea4ab14..a3cd4e28d0 100644 --- a/lib/libcxx/include/__charconv/to_chars_result.h +++ b/lib/libcxx/include/__charconv/to_chars_result.h @@ -21,7 +21,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 17 -struct _LIBCPP_EXPORTED_FROM_ABI to_chars_result { +struct to_chars_result { char* ptr; errc ec; # if _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__charconv/traits.h b/lib/libcxx/include/__charconv/traits.h index 9fd0092ca7..b8c840d1eb 100644 --- a/lib/libcxx/include/__charconv/traits.h +++ b/lib/libcxx/include/__charconv/traits.h @@ -113,31 +113,10 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t -inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool -__mul_overflowed(unsigned char __a, _Tp __b, unsigned char& __r) { - auto __c = __a * __b; - __r = __c; - return __c > numeric_limits::max(); -} - -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool -__mul_overflowed(unsigned short __a, _Tp __b, unsigned short& __r) { - auto __c = __a * __b; - __r = __c; - return __c > numeric_limits::max(); -} - -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool __mul_overflowed(_Tp __a, _Tp __b, _Tp& __r) { - static_assert(is_unsigned<_Tp>::value, ""); - return __builtin_mul_overflow(__a, __b, std::addressof(__r)); -} - template -inline _LIBCPP_HIDE_FROM_ABI bool _LIBCPP_CONSTEXPR_SINCE_CXX23 __mul_overflowed(_Tp __a, _Up __b, _Tp& __r) { - return __itoa::__mul_overflowed(__a, static_cast<_Tp>(__b), __r); +_LIBCPP_HIDE_FROM_ABI bool _LIBCPP_CONSTEXPR_SINCE_CXX23 __mul_overflowed(_Tp __a, _Up __b, _Tp& __r) { + static_assert(is_unsigned<_Tp>::value); + return __builtin_mul_overflow(__a, static_cast<_Tp>(__b), std::addressof(__r)); } template diff --git a/lib/libcxx/include/__chrono/day.h b/lib/libcxx/include/__chrono/day.h index f5b14689a7..46822c5991 100644 --- a/lib/libcxx/include/__chrono/day.h +++ b/lib/libcxx/include/__chrono/day.h @@ -13,6 +13,8 @@ #include <__chrono/duration.h> #include <__compare/ordering.h> #include <__config> +#include <__cstddef/size_t.h> +#include <__functional/hash.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -92,6 +94,15 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr day& day::operator-=(const days& __dd) no } // namespace chrono +# if _LIBCPP_STD_VER >= 26 + +template <> +struct hash { + _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::day& __d) noexcept { return static_cast(__d); } +}; + +# endif // _LIBCPP_STD_VER >= 26 + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__chrono/duration.h b/lib/libcxx/include/__chrono/duration.h index 57fa64d650..9313fc797e 100644 --- a/lib/libcxx/include/__chrono/duration.h +++ b/lib/libcxx/include/__chrono/duration.h @@ -13,6 +13,8 @@ #include <__compare/ordering.h> #include <__compare/three_way_comparable.h> #include <__config> +#include <__cstddef/size_t.h> +#include <__functional/hash.h> #include <__type_traits/common_type.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_convertible.h> @@ -102,7 +104,8 @@ struct __duration_cast<_FromDuration, _ToDuration, _Period, false, false> { }; template , int> = 0> -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration duration_cast(const duration<_Rep, _Period>& __fd) { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration +duration_cast(const duration<_Rep, _Period>& __fd) { return __duration_cast, _ToDuration>()(__fd); } @@ -117,14 +120,18 @@ inline constexpr bool treat_as_floating_point_v = treat_as_floating_point<_Rep>: template struct duration_values { public: - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR _Rep zero() _NOEXCEPT { return _Rep(0); } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR _Rep max() _NOEXCEPT { return numeric_limits<_Rep>::max(); } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR _Rep min() _NOEXCEPT { return numeric_limits<_Rep>::lowest(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR _Rep zero() _NOEXCEPT { return _Rep(0); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR _Rep max() _NOEXCEPT { + return numeric_limits<_Rep>::max(); + } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR _Rep min() _NOEXCEPT { + return numeric_limits<_Rep>::lowest(); + } }; #if _LIBCPP_STD_VER >= 17 template , int> = 0> -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration floor(const duration<_Rep, _Period>& __d) { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration floor(const duration<_Rep, _Period>& __d) { _ToDuration __t = chrono::duration_cast<_ToDuration>(__d); if (__t > __d) __t = __t - _ToDuration{1}; @@ -132,7 +139,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration floor(const duration< } template , int> = 0> -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration ceil(const duration<_Rep, _Period>& __d) { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration ceil(const duration<_Rep, _Period>& __d) { _ToDuration __t = chrono::duration_cast<_ToDuration>(__d); if (__t < __d) __t = __t + _ToDuration{1}; @@ -140,7 +147,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration ceil(const duration<_ } template , int> = 0> -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration round(const duration<_Rep, _Period>& __d) { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration round(const duration<_Rep, _Period>& __d) { _ToDuration __lower = chrono::floor<_ToDuration>(__d); _ToDuration __upper = __lower + _ToDuration{1}; auto __lower_diff = __d - __lower; @@ -220,14 +227,14 @@ public: // observer - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR rep count() const { return __rep_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR rep count() const { return __rep_; } // arithmetic - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR typename common_type::type operator+() const { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR typename common_type::type operator+() const { return typename common_type::type(*this); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR typename common_type::type operator-() const { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR typename common_type::type operator-() const { return typename common_type::type(-__rep_); } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 duration& operator++() { @@ -269,13 +276,13 @@ public: // special values - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR duration zero() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR duration zero() _NOEXCEPT { return duration(duration_values::zero()); } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR duration min() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR duration min() _NOEXCEPT { return duration(duration_values::min()); } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR duration max() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR duration max() _NOEXCEPT { return duration(duration_values::max()); } }; @@ -389,7 +396,7 @@ operator<=>(const duration<_Rep1, _Period1>& __lhs, const duration<_Rep2, _Perio // Duration + template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR typename common_type, duration<_Rep2, _Period2> >::type operator+(const duration<_Rep1, _Period1>& __lhs, const duration<_Rep2, _Period2>& __rhs) { typedef typename common_type, duration<_Rep2, _Period2> >::type _Cd; @@ -399,7 +406,7 @@ operator+(const duration<_Rep1, _Period1>& __lhs, const duration<_Rep2, _Period2 // Duration - template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR typename common_type, duration<_Rep2, _Period2> >::type operator-(const duration<_Rep1, _Period1>& __lhs, const duration<_Rep2, _Period2>& __rhs) { typedef typename common_type, duration<_Rep2, _Period2> >::type _Cd; @@ -412,7 +419,8 @@ template ::type>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR duration::type, _Period> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI +_LIBCPP_CONSTEXPR duration::type, _Period> operator*(const duration<_Rep1, _Period>& __d, const _Rep2& __s) { typedef typename common_type<_Rep1, _Rep2>::type _Cr; typedef duration<_Cr, _Period> _Cd; @@ -423,7 +431,8 @@ template ::type>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR duration::type, _Period> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI +_LIBCPP_CONSTEXPR duration::type, _Period> operator*(const _Rep1& __s, const duration<_Rep2, _Period>& __d) { return __d * __s; } @@ -436,7 +445,8 @@ template && is_convertible::type>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR duration::type, _Period> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI +_LIBCPP_CONSTEXPR duration::type, _Period> operator/(const duration<_Rep1, _Period>& __d, const _Rep2& __s) { typedef typename common_type<_Rep1, _Rep2>::type _Cr; typedef duration<_Cr, _Period> _Cd; @@ -444,7 +454,7 @@ operator/(const duration<_Rep1, _Period>& __d, const _Rep2& __s) { } template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR typename common_type<_Rep1, _Rep2>::type +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR typename common_type<_Rep1, _Rep2>::type operator/(const duration<_Rep1, _Period1>& __lhs, const duration<_Rep2, _Period2>& __rhs) { typedef typename common_type, duration<_Rep2, _Period2> >::type _Ct; return _Ct(__lhs).count() / _Ct(__rhs).count(); @@ -458,7 +468,8 @@ template && is_convertible::type>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR duration::type, _Period> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI +_LIBCPP_CONSTEXPR duration::type, _Period> operator%(const duration<_Rep1, _Period>& __d, const _Rep2& __s) { typedef typename common_type<_Rep1, _Rep2>::type _Cr; typedef duration<_Cr, _Period> _Cd; @@ -466,7 +477,7 @@ operator%(const duration<_Rep1, _Period>& __d, const _Rep2& __s) { } template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR typename common_type, duration<_Rep2, _Period2> >::type operator%(const duration<_Rep1, _Period1>& __lhs, const duration<_Rep2, _Period2>& __rhs) { typedef typename common_type<_Rep1, _Rep2>::type _Cr; @@ -481,51 +492,53 @@ operator%(const duration<_Rep1, _Period1>& __lhs, const duration<_Rep2, _Period2 inline namespace literals { inline namespace chrono_literals { -_LIBCPP_HIDE_FROM_ABI constexpr chrono::hours operator""h(unsigned long long __h) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::hours operator""h(unsigned long long __h) { return chrono::hours(static_cast(__h)); } -_LIBCPP_HIDE_FROM_ABI constexpr chrono::duration> operator""h(long double __h) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::duration> +operator""h(long double __h) { return chrono::duration>(__h); } -_LIBCPP_HIDE_FROM_ABI constexpr chrono::minutes operator""min(unsigned long long __m) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::minutes operator""min(unsigned long long __m) { return chrono::minutes(static_cast(__m)); } -_LIBCPP_HIDE_FROM_ABI constexpr chrono::duration> operator""min(long double __m) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::duration> +operator""min(long double __m) { return chrono::duration>(__m); } -_LIBCPP_HIDE_FROM_ABI constexpr chrono::seconds operator""s(unsigned long long __s) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::seconds operator""s(unsigned long long __s) { return chrono::seconds(static_cast(__s)); } -_LIBCPP_HIDE_FROM_ABI constexpr chrono::duration operator""s(long double __s) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::duration operator""s(long double __s) { return chrono::duration(__s); } -_LIBCPP_HIDE_FROM_ABI constexpr chrono::milliseconds operator""ms(unsigned long long __ms) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::milliseconds operator""ms(unsigned long long __ms) { return chrono::milliseconds(static_cast(__ms)); } -_LIBCPP_HIDE_FROM_ABI constexpr chrono::duration operator""ms(long double __ms) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::duration operator""ms(long double __ms) { return chrono::duration(__ms); } -_LIBCPP_HIDE_FROM_ABI constexpr chrono::microseconds operator""us(unsigned long long __us) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::microseconds operator""us(unsigned long long __us) { return chrono::microseconds(static_cast(__us)); } -_LIBCPP_HIDE_FROM_ABI constexpr chrono::duration operator""us(long double __us) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::duration operator""us(long double __us) { return chrono::duration(__us); } -_LIBCPP_HIDE_FROM_ABI constexpr chrono::nanoseconds operator""ns(unsigned long long __ns) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::nanoseconds operator""ns(unsigned long long __ns) { return chrono::nanoseconds(static_cast(__ns)); } -_LIBCPP_HIDE_FROM_ABI constexpr chrono::duration operator""ns(long double __ns) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::duration operator""ns(long double __ns) { return chrono::duration(__ns); } @@ -538,6 +551,18 @@ using namespace literals::chrono_literals; #endif // _LIBCPP_STD_VER >= 14 +#if _LIBCPP_STD_VER >= 26 + +template + requires __has_enabled_hash<_Rep>::value +struct hash> { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::duration<_Rep, _Period>& __d) { + return hash<_Rep>{}(__d.count()); + } +}; + +#endif // _LIBCPP_STD_VER >= 26 + _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS diff --git a/lib/libcxx/include/__chrono/file_clock.h b/lib/libcxx/include/__chrono/file_clock.h index b4b7e9dc14..968f652f79 100644 --- a/lib/libcxx/include/__chrono/file_clock.h +++ b/lib/libcxx/include/__chrono/file_clock.h @@ -60,16 +60,18 @@ struct _FilesystemClock { _LIBCPP_EXPORTED_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX14 const bool is_steady = false; - _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY _LIBCPP_EXPORTED_FROM_ABI static time_point now() noexcept; + [[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI static time_point now() noexcept; # if _LIBCPP_STD_VER >= 20 template + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static chrono::sys_time<_Duration> to_sys(const chrono::file_time<_Duration>& __t) { return chrono::sys_time<_Duration>(__t.time_since_epoch()); } template - _LIBCPP_HIDE_FROM_ABI static chrono::file_time<_Duration> from_sys(const chrono::sys_time<_Duration>& __t) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static chrono::file_time<_Duration> + from_sys(const chrono::sys_time<_Duration>& __t) { return chrono::file_time<_Duration>(__t.time_since_epoch()); } # endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__chrono/is_clock.h b/lib/libcxx/include/__chrono/is_clock.h new file mode 100644 index 0000000000..e63b8485d0 --- /dev/null +++ b/lib/libcxx/include/__chrono/is_clock.h @@ -0,0 +1,72 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___CHRONO_IS_CLOCK_H +#define _LIBCPP___CHRONO_IS_CLOCK_H + +#include <__config> + +#include <__chrono/duration.h> +#include <__chrono/time_point.h> +#include <__concepts/same_as.h> +#include <__type_traits/integral_constant.h> +#include <__type_traits/is_arithmetic.h> +#include <__type_traits/is_class.h> +#include <__type_traits/is_union.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER >= 20 + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace chrono { + +// Helper to check that _Tp::time_point has the form time_point<_, typename _Tp::duration>. +template +inline constexpr bool __is_valid_clock_time_point_v = false; + +template +inline constexpr bool + __is_valid_clock_time_point_v, _ClockType> = true; + +// Check if a clock satisfies the Cpp17Clock requirements as defined in [time.clock.req] +template +_LIBCPP_NO_SPECIALIZATIONS inline constexpr bool is_clock_v = requires { + typename _Tp::rep; + requires is_arithmetic_v || is_class_v || is_union_v; + + typename _Tp::period; + requires __is_ratio_v; + + typename _Tp::duration; + requires same_as>; + + typename _Tp::time_point; + requires __is_valid_clock_time_point_v; + + _Tp::is_steady; + requires same_as; + + _Tp::now(); + requires same_as; +}; + +template +struct _LIBCPP_NO_SPECIALIZATIONS is_clock : bool_constant> {}; + +} // namespace chrono + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER +#endif // _LIBCPP___CHRONO_IS_CLOCK_H diff --git a/lib/libcxx/include/__chrono/leap_second.h b/lib/libcxx/include/__chrono/leap_second.h index 1857bef803..9e9df6b595 100644 --- a/lib/libcxx/include/__chrono/leap_second.h +++ b/lib/libcxx/include/__chrono/leap_second.h @@ -22,6 +22,8 @@ # include <__compare/ordering.h> # include <__compare/three_way_comparable.h> # include <__config> +# include <__cstddef/size_t.h> +# include <__functional/hash.h> # include <__utility/private_constructor_tag.h> # if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -122,6 +124,17 @@ private: } // namespace chrono +# if _LIBCPP_STD_VER >= 26 + +template <> +struct hash { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::leap_second& __lp) noexcept { + return std::__hash_combine(hash{}(__lp.date()), hash{}(__lp.value())); + } +}; + +# endif // _LIBCPP_STD_VER >= 26 + # endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__chrono/month.h b/lib/libcxx/include/__chrono/month.h index 77c67d0954..669ac66360 100644 --- a/lib/libcxx/include/__chrono/month.h +++ b/lib/libcxx/include/__chrono/month.h @@ -13,6 +13,8 @@ #include <__chrono/duration.h> #include <__compare/ordering.h> #include <__config> +#include <__cstddef/size_t.h> +#include <__functional/hash.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -108,6 +110,17 @@ inline constexpr month December{12}; } // namespace chrono +# if _LIBCPP_STD_VER >= 26 + +template <> +struct hash { + _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::month& __m) noexcept { + return static_cast(__m); + } +}; + +# endif // _LIBCPP_STD_VER >= 26 + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__chrono/month_weekday.h b/lib/libcxx/include/__chrono/month_weekday.h index 7919879655..edb7d38606 100644 --- a/lib/libcxx/include/__chrono/month_weekday.h +++ b/lib/libcxx/include/__chrono/month_weekday.h @@ -13,6 +13,8 @@ #include <__chrono/month.h> #include <__chrono/weekday.h> #include <__config> +#include <__cstddef/size_t.h> +#include <__functional/hash.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -98,6 +100,26 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr month_weekday_last operator/(const weekda } } // namespace chrono +# if _LIBCPP_STD_VER >= 26 + +template <> +struct hash { + _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::month_weekday& __mw) noexcept { + return std::__hash_combine( + hash{}(__mw.month()), hash{}(__mw.weekday_indexed())); + } +}; + +template <> +struct hash { + _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::month_weekday_last& __mwl) noexcept { + return std::__hash_combine( + hash{}(__mwl.month()), hash{}(__mwl.weekday_last())); + } +}; + +# endif // _LIBCPP_STD_VER >= 26 + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__chrono/monthday.h b/lib/libcxx/include/__chrono/monthday.h index 57712cf0b6..2a7262be09 100644 --- a/lib/libcxx/include/__chrono/monthday.h +++ b/lib/libcxx/include/__chrono/monthday.h @@ -15,6 +15,8 @@ #include <__chrono/month.h> #include <__compare/ordering.h> #include <__config> +#include <__cstddef/size_t.h> +#include <__functional/hash.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -126,6 +128,24 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr month_day_last operator/(last_spec, int _ } // namespace chrono +# if _LIBCPP_STD_VER >= 26 + +template <> +struct hash { + _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::month_day& __md) noexcept { + return std::__hash_combine(hash{}(__md.month()), hash{}(__md.day())); + } +}; + +template <> +struct hash { + _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::month_day_last& __mdl) noexcept { + return hash{}(__mdl.month()); + } +}; + +# endif // _LIBCPP_STD_VER >= 26 + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__chrono/steady_clock.h b/lib/libcxx/include/__chrono/steady_clock.h index 1b247b2c28..8e68c9a3c2 100644 --- a/lib/libcxx/include/__chrono/steady_clock.h +++ b/lib/libcxx/include/__chrono/steady_clock.h @@ -31,7 +31,7 @@ public: typedef chrono::time_point time_point; static _LIBCPP_CONSTEXPR_SINCE_CXX14 const bool is_steady = true; - static time_point now() _NOEXCEPT; + [[__nodiscard__]] static time_point now() _NOEXCEPT; }; #endif diff --git a/lib/libcxx/include/__chrono/system_clock.h b/lib/libcxx/include/__chrono/system_clock.h index 5a9eb65bda..e3ef75ae50 100644 --- a/lib/libcxx/include/__chrono/system_clock.h +++ b/lib/libcxx/include/__chrono/system_clock.h @@ -31,9 +31,9 @@ public: typedef chrono::time_point time_point; static _LIBCPP_CONSTEXPR_SINCE_CXX14 const bool is_steady = false; - static time_point now() _NOEXCEPT; - static time_t to_time_t(const time_point& __t) _NOEXCEPT; - static time_point from_time_t(time_t __t) _NOEXCEPT; + [[__nodiscard__]] static time_point now() _NOEXCEPT; + [[__nodiscard__]] static time_t to_time_t(const time_point& __t) _NOEXCEPT; + [[__nodiscard__]] static time_point from_time_t(time_t __t) _NOEXCEPT; }; #if _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__chrono/time_point.h b/lib/libcxx/include/__chrono/time_point.h index fc4408d23d..d393e34cdb 100644 --- a/lib/libcxx/include/__chrono/time_point.h +++ b/lib/libcxx/include/__chrono/time_point.h @@ -14,6 +14,8 @@ #include <__compare/ordering.h> #include <__compare/three_way_comparable.h> #include <__config> +#include <__cstddef/size_t.h> +#include <__functional/hash.h> #include <__type_traits/common_type.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_convertible.h> @@ -54,7 +56,9 @@ public: // observer - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 duration time_since_epoch() const { return __d_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 duration time_since_epoch() const { + return __d_; + } // arithmetic @@ -82,8 +86,12 @@ public: // special values - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR time_point min() _NOEXCEPT { return time_point(duration::min()); } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR time_point max() _NOEXCEPT { return time_point(duration::max()); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR time_point min() _NOEXCEPT { + return time_point(duration::min()); + } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR time_point max() _NOEXCEPT { + return time_point(duration::max()); + } }; } // namespace chrono @@ -95,30 +103,33 @@ struct common_type, chrono::time_point<_C namespace chrono { -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 time_point<_Clock, _ToDuration> +template , int> = 0> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 time_point<_Clock, _ToDuration> time_point_cast(const time_point<_Clock, _Duration>& __t) { return time_point<_Clock, _ToDuration>(chrono::duration_cast<_ToDuration>(__t.time_since_epoch())); } #if _LIBCPP_STD_VER >= 17 template , int> = 0> -inline _LIBCPP_HIDE_FROM_ABI constexpr time_point<_Clock, _ToDuration> floor(const time_point<_Clock, _Duration>& __t) { +[[nodiscard]] inline + _LIBCPP_HIDE_FROM_ABI constexpr time_point<_Clock, _ToDuration> floor(const time_point<_Clock, _Duration>& __t) { return time_point<_Clock, _ToDuration>{chrono::floor<_ToDuration>(__t.time_since_epoch())}; } template , int> = 0> -inline _LIBCPP_HIDE_FROM_ABI constexpr time_point<_Clock, _ToDuration> ceil(const time_point<_Clock, _Duration>& __t) { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI constexpr time_point<_Clock, _ToDuration> +ceil(const time_point<_Clock, _Duration>& __t) { return time_point<_Clock, _ToDuration>{chrono::ceil<_ToDuration>(__t.time_since_epoch())}; } template , int> = 0> -inline _LIBCPP_HIDE_FROM_ABI constexpr time_point<_Clock, _ToDuration> round(const time_point<_Clock, _Duration>& __t) { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI constexpr time_point<_Clock, _ToDuration> +round(const time_point<_Clock, _Duration>& __t) { return time_point<_Clock, _ToDuration>{chrono::round<_ToDuration>(__t.time_since_epoch())}; } template ::is_signed, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI constexpr duration<_Rep, _Period> abs(duration<_Rep, _Period> __d) { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI constexpr duration<_Rep, _Period> abs(duration<_Rep, _Period> __d) { return __d >= __d.zero() ? +__d : -__d; } #endif // _LIBCPP_STD_VER >= 17 @@ -188,7 +199,7 @@ operator<=>(const time_point<_Clock, _Duration1>& __lhs, const time_point<_Clock // time_point operator+(time_point x, duration y); template -inline _LIBCPP_HIDE_FROM_ABI +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 time_point<_Clock, typename common_type<_Duration1, duration<_Rep2, _Period2> >::type> operator+(const time_point<_Clock, _Duration1>& __lhs, const duration<_Rep2, _Period2>& __rhs) { typedef time_point<_Clock, typename common_type<_Duration1, duration<_Rep2, _Period2> >::type> _Tr; @@ -198,7 +209,7 @@ operator+(const time_point<_Clock, _Duration1>& __lhs, const duration<_Rep2, _Pe // time_point operator+(duration x, time_point y); template -inline _LIBCPP_HIDE_FROM_ABI +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 time_point<_Clock, typename common_type, _Duration2>::type> operator+(const duration<_Rep1, _Period1>& __lhs, const time_point<_Clock, _Duration2>& __rhs) { return __rhs + __lhs; @@ -207,7 +218,7 @@ operator+(const duration<_Rep1, _Period1>& __lhs, const time_point<_Clock, _Dura // time_point operator-(time_point x, duration y); template -inline _LIBCPP_HIDE_FROM_ABI +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 time_point<_Clock, typename common_type<_Duration1, duration<_Rep2, _Period2> >::type> operator-(const time_point<_Clock, _Duration1>& __lhs, const duration<_Rep2, _Period2>& __rhs) { typedef time_point<_Clock, typename common_type<_Duration1, duration<_Rep2, _Period2> >::type> _Ret; @@ -217,13 +228,26 @@ operator-(const time_point<_Clock, _Duration1>& __lhs, const duration<_Rep2, _Pe // duration operator-(time_point x, time_point y); template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 typename common_type<_Duration1, _Duration2>::type +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 +typename common_type<_Duration1, _Duration2>::type operator-(const time_point<_Clock, _Duration1>& __lhs, const time_point<_Clock, _Duration2>& __rhs) { return __lhs.time_since_epoch() - __rhs.time_since_epoch(); } } // namespace chrono +#if _LIBCPP_STD_VER >= 26 + +template + requires __has_enabled_hash<_Duration>::value +struct hash> { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::time_point<_Clock, _Duration>& __tp) { + return hash<_Duration>{}(__tp.time_since_epoch()); + } +}; + +#endif // _LIBCPP_STD_VER >= 26 + _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS diff --git a/lib/libcxx/include/__chrono/weekday.h b/lib/libcxx/include/__chrono/weekday.h index 728cbb8446..143803495d 100644 --- a/lib/libcxx/include/__chrono/weekday.h +++ b/lib/libcxx/include/__chrono/weekday.h @@ -15,6 +15,8 @@ #include <__chrono/system_clock.h> #include <__chrono/time_point.h> #include <__config> +#include <__cstddef/size_t.h> +#include <__functional/hash.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -160,6 +162,29 @@ inline constexpr weekday Saturday{6}; } // namespace chrono +# if _LIBCPP_STD_VER >= 26 + +template <> +struct hash { + _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::weekday& __w) noexcept { return __w.c_encoding(); } +}; + +template <> +struct hash { + _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::weekday_indexed& __wi) noexcept { + return std::__hash_combine(hash{}(__wi.weekday()), __wi.index()); + } +}; + +template <> +struct hash { + _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::weekday_last& __wl) noexcept { + return hash{}(__wl.weekday()); + } +}; + +# endif // _LIBCPP_STD_VER >= 26 + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__chrono/year.h b/lib/libcxx/include/__chrono/year.h index 2ae5180cb8..aaef38acd9 100644 --- a/lib/libcxx/include/__chrono/year.h +++ b/lib/libcxx/include/__chrono/year.h @@ -13,6 +13,8 @@ #include <__chrono/duration.h> #include <__compare/ordering.h> #include <__config> +#include <__cstddef/size_t.h> +#include <__functional/hash.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -109,6 +111,15 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool year::ok() const noexcept { } // namespace chrono +# if _LIBCPP_STD_VER >= 26 + +template <> +struct hash { + _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::year& __y) noexcept { return static_cast(__y); } +}; + +# endif // _LIBCPP_STD_VER >= 26 + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__chrono/year_month.h b/lib/libcxx/include/__chrono/year_month.h index cf9234bdb4..e36091c021 100644 --- a/lib/libcxx/include/__chrono/year_month.h +++ b/lib/libcxx/include/__chrono/year_month.h @@ -15,6 +15,8 @@ #include <__chrono/year.h> #include <__compare/ordering.h> #include <__config> +#include <__cstddef/size_t.h> +#include <__functional/hash.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -116,6 +118,17 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr year_month& year_month::operator-=(const } // namespace chrono +# if _LIBCPP_STD_VER >= 26 + +template <> +struct hash { + _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::year_month& __ym) noexcept { + return std::__hash_combine(hash{}(__ym.year()), hash{}(__ym.month())); + } +}; + +# endif // _LIBCPP_STD_VER >= 26 + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__chrono/year_month_day.h b/lib/libcxx/include/__chrono/year_month_day.h index a0510a14f4..0a2aaedd60 100644 --- a/lib/libcxx/include/__chrono/year_month_day.h +++ b/lib/libcxx/include/__chrono/year_month_day.h @@ -21,6 +21,8 @@ #include <__chrono/year_month.h> #include <__compare/ordering.h> #include <__config> +#include <__cstddef/size_t.h> +#include <__functional/hash.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -330,6 +332,27 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr bool year_month_day::ok() const noexcept } // namespace chrono +# if _LIBCPP_STD_VER >= 26 + +template <> +struct hash { + _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::year_month_day& __ymd) noexcept { + return std::__hash_combine( + hash{}(__ymd.year()), + std::__hash_combine(hash{}(__ymd.month()), hash{}(__ymd.day()))); + } +}; + +template <> +struct hash { + _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::year_month_day_last& __ymdl) noexcept { + return std::__hash_combine( + hash{}(__ymdl.year()), hash{}(__ymdl.month_day_last())); + } +}; + +# endif // _LIBCPP_STD_VER >= 26 + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__chrono/year_month_weekday.h b/lib/libcxx/include/__chrono/year_month_weekday.h index 0c3dd494c8..6ed1e21fe9 100644 --- a/lib/libcxx/include/__chrono/year_month_weekday.h +++ b/lib/libcxx/include/__chrono/year_month_weekday.h @@ -22,6 +22,8 @@ #include <__chrono/year_month.h> #include <__chrono/year_month_day.h> #include <__config> +#include <__cstddef/size_t.h> +#include <__functional/hash.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -280,6 +282,30 @@ year_month_weekday_last::operator-=(const years& __dy) noexcept { } // namespace chrono +# if _LIBCPP_STD_VER >= 26 + +template <> +struct hash { + _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::year_month_weekday& __ymw) noexcept { + return std::__hash_combine( + hash{}(__ymw.year()), + std::__hash_combine( + hash{}(__ymw.month()), hash{}(__ymw.weekday_indexed()))); + } +}; + +template <> +struct hash { + _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::year_month_weekday_last& __ymwl) noexcept { + return std::__hash_combine( + hash{}(__ymwl.year()), + std::__hash_combine( + hash{}(__ymwl.month()), hash{}(__ymwl.weekday_last()))); + } +}; + +# endif // _LIBCPP_STD_VER >= 26 + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__chrono/zoned_time.h b/lib/libcxx/include/__chrono/zoned_time.h index 8db687a422..e63528e57d 100644 --- a/lib/libcxx/include/__chrono/zoned_time.h +++ b/lib/libcxx/include/__chrono/zoned_time.h @@ -24,6 +24,8 @@ # include <__chrono/tzdb_list.h> # include <__concepts/constructible.h> # include <__config> +# include <__cstddef/size_t.h> +# include <__functional/hash.h> # include <__type_traits/common_type.h> # include <__type_traits/conditional.h> # include <__type_traits/remove_cvref.h> @@ -216,6 +218,20 @@ operator==(const zoned_time<_Duration1, _TimeZonePtr>& __lhs, const zoned_time<_ } // namespace chrono +# if _LIBCPP_STD_VER >= 26 + +template + requires __has_enabled_hash<_Duration>::value && __has_enabled_hash<_TimeZonePtr>::value +struct hash> { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static size_t + operator()(const chrono::zoned_time<_Duration, _TimeZonePtr>& __zt) { + return std::__hash_combine( + hash>{}(__zt.get_sys_time()), hash<_TimeZonePtr>{}(__zt.get_time_zone())); + } +}; + +# endif // _LIBCPP_STD_VER >= 26 + # endif // _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && // _LIBCPP_HAS_LOCALIZATION diff --git a/lib/libcxx/include/__compare/is_eq.h b/lib/libcxx/include/__compare/is_eq.h index 9a82df1ebe..ee4d11bc7c 100644 --- a/lib/libcxx/include/__compare/is_eq.h +++ b/lib/libcxx/include/__compare/is_eq.h @@ -20,12 +20,12 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 -_LIBCPP_HIDE_FROM_ABI inline constexpr bool is_eq(partial_ordering __c) noexcept { return __c == 0; } -_LIBCPP_HIDE_FROM_ABI inline constexpr bool is_neq(partial_ordering __c) noexcept { return __c != 0; } -_LIBCPP_HIDE_FROM_ABI inline constexpr bool is_lt(partial_ordering __c) noexcept { return __c < 0; } -_LIBCPP_HIDE_FROM_ABI inline constexpr bool is_lteq(partial_ordering __c) noexcept { return __c <= 0; } -_LIBCPP_HIDE_FROM_ABI inline constexpr bool is_gt(partial_ordering __c) noexcept { return __c > 0; } -_LIBCPP_HIDE_FROM_ABI inline constexpr bool is_gteq(partial_ordering __c) noexcept { return __c >= 0; } +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr bool is_eq(partial_ordering __c) noexcept { return __c == 0; } +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr bool is_neq(partial_ordering __c) noexcept { return __c != 0; } +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr bool is_lt(partial_ordering __c) noexcept { return __c < 0; } +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr bool is_lteq(partial_ordering __c) noexcept { return __c <= 0; } +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr bool is_gt(partial_ordering __c) noexcept { return __c > 0; } +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr bool is_gteq(partial_ordering __c) noexcept { return __c >= 0; } #endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__compare/strong_order.h b/lib/libcxx/include/__compare/strong_order.h index 8c363b5638..ba6de44643 100644 --- a/lib/libcxx/include/__compare/strong_order.h +++ b/lib/libcxx/include/__compare/strong_order.h @@ -13,7 +13,6 @@ #include <__compare/compare_three_way.h> #include <__compare/ordering.h> #include <__config> -#include <__math/exponential_functions.h> #include <__math/traits.h> #include <__type_traits/conditional.h> #include <__type_traits/decay.h> @@ -53,38 +52,21 @@ struct __fn { template > requires is_same_v<_Dp, decay_t<_Up>> && is_floating_point_v<_Dp> _LIBCPP_HIDE_FROM_ABI static constexpr strong_ordering __go(_Tp&& __t, _Up&& __u, __priority_tag<1>) noexcept { - if constexpr (numeric_limits<_Dp>::is_iec559 && sizeof(_Dp) == sizeof(int32_t)) { - int32_t __rx = std::bit_cast(__t); - int32_t __ry = std::bit_cast(__u); - __rx = (__rx < 0) ? (numeric_limits::min() - __rx - 1) : __rx; - __ry = (__ry < 0) ? (numeric_limits::min() - __ry - 1) : __ry; - return (__rx <=> __ry); - } else if constexpr (numeric_limits<_Dp>::is_iec559 && sizeof(_Dp) == sizeof(int64_t)) { - int64_t __rx = std::bit_cast(__t); - int64_t __ry = std::bit_cast(__u); - __rx = (__rx < 0) ? (numeric_limits::min() - __rx - 1) : __rx; - __ry = (__ry < 0) ? (numeric_limits::min() - __ry - 1) : __ry; + if constexpr (numeric_limits<_Dp>::is_iec559 && + (sizeof(_Dp) == sizeof(int32_t) || sizeof(_Dp) == sizeof(int64_t))) { + using _IntT = conditional_t; + _IntT __rx = std::bit_cast<_IntT>(__t); + _IntT __ry = std::bit_cast<_IntT>(__u); + __rx = (__rx < 0) ? (numeric_limits<_IntT>::min() - __rx - 1) : __rx; + __ry = (__ry < 0) ? (numeric_limits<_IntT>::min() - __ry - 1) : __ry; return (__rx <=> __ry); } else if (__t < __u) { return strong_ordering::less; } else if (__t > __u) { return strong_ordering::greater; } else if (__t == __u) { - if constexpr (numeric_limits<_Dp>::radix == 2) { - return __math::signbit(__u) <=> __math::signbit(__t); - } else { - // This is bullet 3 of the IEEE754 algorithm, relevant - // only for decimal floating-point; - // see https://stackoverflow.com/questions/69068075/ - if (__t == 0 || __math::isinf(__t)) { - return __math::signbit(__u) <=> __math::signbit(__t); - } else { - int __texp, __uexp; - (void)__math::frexp(__t, &__texp); - (void)__math::frexp(__u, &__uexp); - return (__t < 0) ? (__texp <=> __uexp) : (__uexp <=> __texp); - } - } + static_assert(numeric_limits<_Dp>::radix == 2, "floating point type with a radix other than 2?"); + return __math::signbit(__u) <=> __math::signbit(__t); } else { // They're unordered, so one of them must be a NAN. // The order is -QNAN, -SNAN, numbers, +SNAN, +QNAN. @@ -93,9 +75,9 @@ struct __fn { bool __t_is_negative = __math::signbit(__t); bool __u_is_negative = __math::signbit(__u); using _IntType = - conditional_t< sizeof(__t) == sizeof(int32_t), - int32_t, - conditional_t< sizeof(__t) == sizeof(int64_t), int64_t, void> >; + conditional_t>; if constexpr (is_same_v<_IntType, void>) { static_assert(sizeof(_Dp) == 0, "std::strong_order is unimplemented for this floating-point type"); } else if (__t_is_nan && __u_is_nan) { diff --git a/lib/libcxx/include/__compare/three_way_comparable.h b/lib/libcxx/include/__compare/three_way_comparable.h index 7a44ea9158..ad6d05a681 100644 --- a/lib/libcxx/include/__compare/three_way_comparable.h +++ b/lib/libcxx/include/__compare/three_way_comparable.h @@ -12,6 +12,7 @@ #include <__compare/common_comparison_category.h> #include <__compare/ordering.h> #include <__concepts/common_reference_with.h> +#include <__concepts/comparison_common_type.h> #include <__concepts/equality_comparable.h> #include <__concepts/same_as.h> #include <__concepts/totally_ordered.h> @@ -39,8 +40,7 @@ concept three_way_comparable = template concept three_way_comparable_with = - three_way_comparable<_Tp, _Cat> && three_way_comparable<_Up, _Cat> && - common_reference_with<__make_const_lvalue_ref<_Tp>, __make_const_lvalue_ref<_Up>> && + three_way_comparable<_Tp, _Cat> && three_way_comparable<_Up, _Cat> && __comparison_common_type_with<_Tp, _Up> && three_way_comparable, __make_const_lvalue_ref<_Up>>, _Cat> && __weakly_equality_comparable_with<_Tp, _Up> && __partially_ordered_with<_Tp, _Up> && requires(__make_const_lvalue_ref<_Tp> __t, __make_const_lvalue_ref<_Up> __u) { diff --git a/lib/libcxx/include/__concepts/comparison_common_type.h b/lib/libcxx/include/__concepts/comparison_common_type.h new file mode 100644 index 0000000000..3f0d770511 --- /dev/null +++ b/lib/libcxx/include/__concepts/comparison_common_type.h @@ -0,0 +1,40 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___CONCEPTS_COMPARISON_COMMON_TYPE_H +#define _LIBCPP___CONCEPTS_COMPARISON_COMMON_TYPE_H + +#include <__concepts/convertible_to.h> +#include <__concepts/same_as.h> +#include <__config> +#include <__type_traits/common_reference.h> +#include <__type_traits/remove_cvref.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 20 + +template > +concept __comparison_common_type_with_impl = + same_as, common_reference_t> && requires { + requires convertible_to || convertible_to<_Tp, const _CommonRef&>; + requires convertible_to || convertible_to<_Up, const _CommonRef&>; + }; + +template +concept __comparison_common_type_with = __comparison_common_type_with_impl, remove_cvref_t<_Up>>; + +#endif // _LIBCPP_STD_VER >= 20 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___CONCEPTS_COMPARISON_COMMON_TYPE_H diff --git a/lib/libcxx/include/__concepts/equality_comparable.h b/lib/libcxx/include/__concepts/equality_comparable.h index 278fc76409..56fc6f8b68 100644 --- a/lib/libcxx/include/__concepts/equality_comparable.h +++ b/lib/libcxx/include/__concepts/equality_comparable.h @@ -11,6 +11,7 @@ #include <__concepts/boolean_testable.h> #include <__concepts/common_reference_with.h> +#include <__concepts/comparison_common_type.h> #include <__config> #include <__type_traits/common_reference.h> #include <__type_traits/make_const_lvalue_ref.h> @@ -41,7 +42,7 @@ concept equality_comparable = __weakly_equality_comparable_with<_Tp, _Tp>; template concept equality_comparable_with = equality_comparable<_Tp> && equality_comparable<_Up> && - common_reference_with<__make_const_lvalue_ref<_Tp>, __make_const_lvalue_ref<_Up>> && + __comparison_common_type_with<_Tp, _Up> && equality_comparable< common_reference_t< __make_const_lvalue_ref<_Tp>, diff --git a/lib/libcxx/include/__condition_variable/condition_variable.h b/lib/libcxx/include/__condition_variable/condition_variable.h index 1e8edd5dcb..b7151930e9 100644 --- a/lib/libcxx/include/__condition_variable/condition_variable.h +++ b/lib/libcxx/include/__condition_variable/condition_variable.h @@ -170,7 +170,7 @@ public: wait_for(unique_lock& __lk, const chrono::duration<_Rep, _Period>& __d, _Predicate __pred); typedef __libcpp_condvar_t* native_handle_type; - _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() { return &__cv_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() { return &__cv_; } private: void diff --git a/lib/libcxx/include/__config b/lib/libcxx/include/__config index 03538ff78e..de32af8f76 100644 --- a/lib/libcxx/include/__config +++ b/lib/libcxx/include/__config @@ -14,6 +14,8 @@ #include <__configuration/abi.h> #include <__configuration/availability.h> #include <__configuration/compiler.h> +#include <__configuration/experimental.h> +#include <__configuration/hardening.h> #include <__configuration/language.h> #include <__configuration/platform.h> @@ -28,7 +30,7 @@ // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM. // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is // defined to XXYYZZ. -# define _LIBCPP_VERSION 210100 +# define _LIBCPP_VERSION 220104 # define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y # define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) @@ -38,195 +40,6 @@ # define _LIBCPP_FREESTANDING # endif -// NOLINTNEXTLINE(libcpp-cpp-version-check) -# if __cplusplus < 201103L -# define _LIBCPP_CXX03_LANG -# endif - -# if __has_feature(experimental_library) -# ifndef _LIBCPP_ENABLE_EXPERIMENTAL -# define _LIBCPP_ENABLE_EXPERIMENTAL -# endif -# endif - -// Incomplete features get their own specific disabling flags. This makes it -// easier to grep for target specific flags once the feature is complete. -# if defined(_LIBCPP_ENABLE_EXPERIMENTAL) || defined(_LIBCPP_BUILDING_LIBRARY) -# define _LIBCPP_HAS_EXPERIMENTAL_LIBRARY 1 -# else -# define _LIBCPP_HAS_EXPERIMENTAL_LIBRARY 0 -# endif - -# define _LIBCPP_HAS_EXPERIMENTAL_PSTL _LIBCPP_HAS_EXPERIMENTAL_LIBRARY -# define _LIBCPP_HAS_EXPERIMENTAL_TZDB _LIBCPP_HAS_EXPERIMENTAL_LIBRARY -# define _LIBCPP_HAS_EXPERIMENTAL_SYNCSTREAM _LIBCPP_HAS_EXPERIMENTAL_LIBRARY -# define _LIBCPP_HAS_EXPERIMENTAL_HARDENING_OBSERVE_SEMANTIC _LIBCPP_HAS_EXPERIMENTAL_LIBRARY - -// HARDENING { - -// TODO(LLVM 23): Remove this. We're making these an error to catch folks who might not have migrated. -// Since hardening went through several changes (many of which impacted user-facing macros), -// we're keeping these checks around for a bit longer than usual. Failure to properly configure -// hardening results in checks being dropped silently, which is a pretty big deal. -# if defined(_LIBCPP_ENABLE_ASSERTIONS) -# error "_LIBCPP_ENABLE_ASSERTIONS has been removed, please use _LIBCPP_HARDENING_MODE= instead (see docs)" -# endif -# if defined(_LIBCPP_ENABLE_HARDENED_MODE) -# error "_LIBCPP_ENABLE_HARDENED_MODE has been removed, please use _LIBCPP_HARDENING_MODE= instead (see docs)" -# endif -# if defined(_LIBCPP_ENABLE_SAFE_MODE) -# error "_LIBCPP_ENABLE_SAFE_MODE has been removed, please use _LIBCPP_HARDENING_MODE= instead (see docs)" -# endif -# if defined(_LIBCPP_ENABLE_DEBUG_MODE) -# error "_LIBCPP_ENABLE_DEBUG_MODE has been removed, please use _LIBCPP_HARDENING_MODE= instead (see docs)" -# endif - -// The library provides the macro `_LIBCPP_HARDENING_MODE` which can be set to one of the following values: -// -// - `_LIBCPP_HARDENING_MODE_NONE`; -// - `_LIBCPP_HARDENING_MODE_FAST`; -// - `_LIBCPP_HARDENING_MODE_EXTENSIVE`; -// - `_LIBCPP_HARDENING_MODE_DEBUG`. -// -// These values have the following effects: -// -// - `_LIBCPP_HARDENING_MODE_NONE` -- sets the hardening mode to "none" which disables all runtime hardening checks; -// -// - `_LIBCPP_HARDENING_MODE_FAST` -- sets that hardening mode to "fast". The fast mode enables security-critical checks -// that can be done with relatively little runtime overhead in constant time; -// -// - `_LIBCPP_HARDENING_MODE_EXTENSIVE` -- sets the hardening mode to "extensive". The extensive mode is a superset of -// the fast mode that additionally enables checks that are relatively cheap and prevent common types of logic errors -// but are not necessarily security-critical; -// -// - `_LIBCPP_HARDENING_MODE_DEBUG` -- sets the hardening mode to "debug". The debug mode is a superset of the extensive -// mode and enables all checks available in the library, including internal assertions. Checks that are part of the -// debug mode can be very expensive and thus the debug mode is intended to be used for testing, not in production. - -// Inside the library, assertions are categorized so they can be cherry-picked based on the chosen hardening mode. These -// macros are only for internal use -- users should only pick one of the high-level hardening modes described above. -// -// - `_LIBCPP_ASSERT_VALID_INPUT_RANGE` -- checks that ranges (whether expressed as an iterator pair, an iterator and -// a sentinel, an iterator and a count, or a `std::range`) given as input to library functions are valid: -// - the sentinel is reachable from the begin iterator; -// - TODO(hardening): both iterators refer to the same container. -// -// - `_LIBCPP_ASSERT_VALID_ELEMENT_ACCESS` -- checks that any attempts to access a container element, whether through -// the container object or through an iterator, are valid and do not attempt to go out of bounds or otherwise access -// a non-existent element. For iterator checks to work, bounded iterators must be enabled in the ABI. Types like -// `optional` and `function` are considered one-element containers for the purposes of this check. -// -// - `_LIBCPP_ASSERT_NON_NULL` -- checks that the pointer being dereferenced is not null. On most modern platforms zero -// address does not refer to an actual location in memory, so a null pointer dereference would not compromize the -// memory security of a program (however, it is still undefined behavior that can result in strange errors due to -// compiler optimizations). -// -// - `_LIBCPP_ASSERT_NON_OVERLAPPING_RANGES` -- for functions that take several ranges as arguments, checks that the -// given ranges do not overlap. -// -// - `_LIBCPP_ASSERT_VALID_DEALLOCATION` -- checks that an attempt to deallocate memory is valid (e.g. the given object -// was allocated by the given allocator). Violating this category typically results in a memory leak. -// -// - `_LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL` -- checks that a call to an external API doesn't fail in -// an unexpected manner. This includes triggering documented cases of undefined behavior in an external library (like -// attempting to unlock an unlocked mutex in pthreads). Any API external to the library falls under this category -// (from system calls to compiler intrinsics). We generally don't expect these failures to compromize memory safety or -// otherwise create an immediate security issue. -// -// - `_LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR` -- checks any operations that exchange nodes between containers to make sure -// the containers have compatible allocators. -// -// - `_LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN` -- checks that the given argument is within the domain of valid arguments -// for the function. Violating this typically produces an incorrect result (e.g. the clamp algorithm returns the -// original value without clamping it due to incorrect functors) or puts an object into an invalid state (e.g. -// a string view where only a subset of elements is possible to access). This category is for assertions violating -// which doesn't cause any immediate issues in the library -- whatever the consequences are, they will happen in the -// user code. -// -// - `_LIBCPP_ASSERT_PEDANTIC` -- checks prerequisites which are imposed by the Standard, but violating which happens to -// be benign in our implementation. -// -// - `_LIBCPP_ASSERT_SEMANTIC_REQUIREMENT` -- checks that the given argument satisfies the semantic requirements imposed -// by the Standard. Typically, there is no simple way to completely prove that a semantic requirement is satisfied; -// thus, this would often be a heuristic check and it might be quite expensive. -// -// - `_LIBCPP_ASSERT_INTERNAL` -- checks that internal invariants of the library hold. These assertions don't depend on -// user input. -// -// - `_LIBCPP_ASSERT_UNCATEGORIZED` -- for assertions that haven't been properly classified yet. - -// clang-format off -# define _LIBCPP_HARDENING_MODE_NONE (1 << 1) -# define _LIBCPP_HARDENING_MODE_FAST (1 << 2) -# define _LIBCPP_HARDENING_MODE_EXTENSIVE (1 << 4) // Deliberately not ordered. -# define _LIBCPP_HARDENING_MODE_DEBUG (1 << 3) -// clang-format on - -# ifndef _LIBCPP_HARDENING_MODE - -# ifndef _LIBCPP_HARDENING_MODE_DEFAULT -# error _LIBCPP_HARDENING_MODE_DEFAULT is not defined. This definition should be set at configuration time in the \ -`__config_site` header, please make sure your installation of libc++ is not broken. -# endif - -# define _LIBCPP_HARDENING_MODE _LIBCPP_HARDENING_MODE_DEFAULT -# endif - -# if _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_NONE && \ - _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_FAST && \ - _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_EXTENSIVE && \ - _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_DEBUG -# error _LIBCPP_HARDENING_MODE must be set to one of the following values: \ -_LIBCPP_HARDENING_MODE_NONE, \ -_LIBCPP_HARDENING_MODE_FAST, \ -_LIBCPP_HARDENING_MODE_EXTENSIVE, \ -_LIBCPP_HARDENING_MODE_DEBUG -# endif - -// Hardening assertion semantics generally mirror the evaluation semantics of C++26 Contracts: -// - `ignore` evaluates the assertion but doesn't do anything if it fails (note that it differs from the Contracts -// `ignore` semantic which wouldn't evaluate the assertion at all); -// - `observe` logs an error (indicating, if possible, that the error is fatal) and continues execution; -// - `quick-enforce` terminates the program as fast as possible (via trapping); -// - `enforce` logs an error and then terminates the program. -// -// Notes: -// - Continuing execution after a hardening check fails results in undefined behavior; the `observe` semantic is meant -// to make adopting hardening easier but should not be used outside of this scenario; -// - C++26 wording for Library Hardening precludes a conforming Hardened implementation from using the Contracts -// `ignore` semantic when evaluating hardened preconditions in the Library. Libc++ allows using this semantic for -// hardened preconditions, however, be aware that using `ignore` does not produce a conforming "Hardened" -// implementation, unlike the other semantics above. -// clang-format off -# define _LIBCPP_ASSERTION_SEMANTIC_IGNORE (1 << 1) -# define _LIBCPP_ASSERTION_SEMANTIC_OBSERVE (1 << 2) -# define _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE (1 << 3) -# define _LIBCPP_ASSERTION_SEMANTIC_ENFORCE (1 << 4) -// clang-format on - -// Allow users to define an arbitrary assertion semantic; otherwise, use the default mapping from modes to semantics. -// The default is for production-capable modes to use `quick-enforce` (i.e., trap) and for the `debug` mode to use -// `enforce` (i.e., log and abort). -# ifndef _LIBCPP_ASSERTION_SEMANTIC - -# if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG -# define _LIBCPP_ASSERTION_SEMANTIC _LIBCPP_ASSERTION_SEMANTIC_ENFORCE -# else -# define _LIBCPP_ASSERTION_SEMANTIC _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE -# endif - -# else -# if !_LIBCPP_HAS_EXPERIMENTAL_LIBRARY -# error "Assertion semantics are an experimental feature." -# endif -# if defined(_LIBCPP_CXX03_LANG) -# error "Assertion semantics are not available in the C++03 mode." -# endif - -# endif // _LIBCPP_ASSERTION_SEMANTIC - -// } HARDENING - # define _LIBCPP_TOSTRING2(x) #x # define _LIBCPP_TOSTRING(x) _LIBCPP_TOSTRING2(x) @@ -320,13 +133,6 @@ _LIBCPP_HARDENING_MODE_DEBUG // When this option is used, the token passed to `std::random_device`'s // constructor *must* be "/dev/urandom" -- anything else is an error. // -// _LIBCPP_USING_NACL_RANDOM -// NaCl's sandbox (which PNaCl also runs in) doesn't allow filesystem access, -// including accesses to the special files under `/dev`. This implementation -// uses the NaCL syscall `nacl_secure_random_init()` to get entropy. -// When this option is used, the token passed to `std::random_device`'s -// constructor *must* be "/dev/urandom" -- anything else is an error. -// // _LIBCPP_USING_WIN32_RANDOM // Use rand_s(), for use on Windows. // When this option is used, the token passed to `std::random_device`'s @@ -338,8 +144,6 @@ _LIBCPP_HARDENING_MODE_DEBUG # define _LIBCPP_USING_GETENTROPY # elif defined(__Fuchsia__) # define _LIBCPP_USING_FUCHSIA_CPRNG -# elif defined(__native_client__) -# define _LIBCPP_USING_NACL_RANDOM # elif defined(_LIBCPP_WIN32API) # define _LIBCPP_USING_WIN32_RANDOM # else @@ -348,7 +152,7 @@ _LIBCPP_HARDENING_MODE_DEBUG # ifndef _LIBCPP_CXX03_LANG -# define _LIBCPP_ALIGNOF(_Tp) alignof(_Tp) +# define _LIBCPP_ALIGNOF(...) alignof(__VA_ARGS__) # define _ALIGNAS_TYPE(x) alignas(x) # define _ALIGNAS(x) alignas(x) # define _NOEXCEPT noexcept @@ -357,7 +161,7 @@ _LIBCPP_HARDENING_MODE_DEBUG # else -# define _LIBCPP_ALIGNOF(_Tp) _Alignof(_Tp) +# define _LIBCPP_ALIGNOF(...) _Alignof(__VA_ARGS__) # define _ALIGNAS_TYPE(x) __attribute__((__aligned__(_LIBCPP_ALIGNOF(x)))) # define _ALIGNAS(x) __attribute__((__aligned__(x))) # define nullptr __nullptr @@ -471,6 +275,12 @@ typedef __char32_t char32_t; # define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str) # endif +// Macros to enter and leave a state where deprecation warnings are suppressed. +# define _LIBCPP_SUPPRESS_DEPRECATED_PUSH \ + _LIBCPP_DIAGNOSTIC_PUSH _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wdeprecated") \ + _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wdeprecated-declarations") +# define _LIBCPP_SUPPRESS_DEPRECATED_POP _LIBCPP_DIAGNOSTIC_POP + # if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_FAST # define _LIBCPP_HARDENING_SIG f # elif _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_EXTENSIVE @@ -481,6 +291,16 @@ typedef __char32_t char32_t; # define _LIBCPP_HARDENING_SIG n // "none" # endif +# if _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_OBSERVE +# define _LIBCPP_ASSERTION_SEMANTIC_SIG o +# elif _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE +# define _LIBCPP_ASSERTION_SEMANTIC_SIG q +# elif _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_ENFORCE +# define _LIBCPP_ASSERTION_SEMANTIC_SIG e +# else +# define _LIBCPP_ASSERTION_SEMANTIC_SIG i // `ignore` +# endif + # if !_LIBCPP_HAS_EXCEPTIONS # define _LIBCPP_EXCEPTIONS_SIG n # else @@ -488,7 +308,9 @@ typedef __char32_t char32_t; # endif # define _LIBCPP_ODR_SIGNATURE \ - _LIBCPP_CONCAT(_LIBCPP_CONCAT(_LIBCPP_HARDENING_SIG, _LIBCPP_EXCEPTIONS_SIG), _LIBCPP_VERSION) + _LIBCPP_CONCAT( \ + _LIBCPP_CONCAT(_LIBCPP_CONCAT(_LIBCPP_HARDENING_SIG, _LIBCPP_ASSERTION_SEMANTIC_SIG), _LIBCPP_EXCEPTIONS_SIG), \ + _LIBCPP_VERSION) // This macro marks a symbol as being hidden from libc++'s ABI. This is achieved // on two levels: @@ -550,16 +372,6 @@ typedef __char32_t char32_t; # endif # define _LIBCPP_HIDE_FROM_ABI_VIRTUAL _LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION -# ifdef _LIBCPP_BUILDING_LIBRARY -# if _LIBCPP_ABI_VERSION > 1 -# define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 _LIBCPP_HIDE_FROM_ABI -# else -# define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 -# endif -# else -# define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 _LIBCPP_HIDE_FROM_ABI -# endif - // Clang modules take a significant compile time hit when pushing and popping diagnostics. // Since all the headers are marked as system headers unless _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER is defined, we can // simply disable this pushing and popping when _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER isn't defined. @@ -676,27 +488,6 @@ typedef __char32_t char32_t; # endif # endif -// It is not yet possible to use aligned_alloc() on all Apple platforms since -// 10.15 was the first version to ship an implementation of aligned_alloc(). -# if defined(__APPLE__) -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && \ - __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && \ - __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 130000) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && \ - __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 60000) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 130000) -# define _LIBCPP_HAS_C11_ALIGNED_ALLOC 0 -# else -# define _LIBCPP_HAS_C11_ALIGNED_ALLOC 1 -# endif -# elif defined(__ANDROID__) && __ANDROID_API__ < 28 -// Android only provides aligned_alloc when targeting API 28 or higher. -# define _LIBCPP_HAS_C11_ALIGNED_ALLOC 0 -# else -# define _LIBCPP_HAS_C11_ALIGNED_ALLOC 1 -# endif - # if defined(__APPLE__) || defined(__FreeBSD__) # define _LIBCPP_WCTYPE_IS_MASK # endif @@ -727,6 +518,15 @@ typedef __char32_t char32_t; # define _LIBCPP_DEPRECATED_(m) # endif +// FIXME: using `#warning` causes diagnostics from system headers which include deprecated headers. This can only be +// enabled again once https://github.com/llvm/llvm-project/pull/168041 (or a similar feature) has landed, since that +// allows suppression in system headers. +# if defined(__DEPRECATED) && __DEPRECATED && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) && 0 +# define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 1 +# else +# define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 0 +# endif + # if !defined(_LIBCPP_CXX03_LANG) # define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED # else @@ -771,17 +571,6 @@ typedef __char32_t char32_t; # define _LIBCPP_DEPRECATED_WITH_CHAR8_T # endif -// Macros to enter and leave a state where deprecation warnings are suppressed. -# if defined(_LIBCPP_COMPILER_CLANG_BASED) || defined(_LIBCPP_COMPILER_GCC) -# define _LIBCPP_SUPPRESS_DEPRECATED_PUSH \ - _Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Wdeprecated\"") \ - _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -# define _LIBCPP_SUPPRESS_DEPRECATED_POP _Pragma("GCC diagnostic pop") -# else -# define _LIBCPP_SUPPRESS_DEPRECATED_PUSH -# define _LIBCPP_SUPPRESS_DEPRECATED_POP -# endif - # if _LIBCPP_STD_VER <= 11 # define _LIBCPP_EXPLICIT_SINCE_CXX14 # else @@ -861,18 +650,10 @@ typedef __char32_t char32_t; # endif // _LIBCPP_HAS_THREAD_API # endif // _LIBCPP_HAS_THREADS -# if _LIBCPP_HAS_THREAD_API_PTHREAD -# if defined(__ANDROID__) && __ANDROID_API__ >= 30 -# define _LIBCPP_HAS_COND_CLOCKWAIT 1 -# elif defined(_LIBCPP_GLIBC_PREREQ) -# if _LIBCPP_GLIBC_PREREQ(2, 30) -# define _LIBCPP_HAS_COND_CLOCKWAIT 1 -# else -# define _LIBCPP_HAS_COND_CLOCKWAIT 0 -# endif -# else -# define _LIBCPP_HAS_COND_CLOCKWAIT 0 -# endif +# if !_LIBCPP_HAS_THREAD_API_PTHREAD +# define _LIBCPP_HAS_COND_CLOCKWAIT 0 +# elif (defined(__ANDROID__) && __ANDROID_API__ >= 30) || _LIBCPP_GLIBC_PREREQ(2, 30) +# define _LIBCPP_HAS_COND_CLOCKWAIT 1 # else # define _LIBCPP_HAS_COND_CLOCKWAIT 0 # endif @@ -951,8 +732,8 @@ typedef __char32_t char32_t; # endif # endif -# if defined(__FreeBSD__) && defined(__clang__) && __has_attribute(__no_thread_safety_analysis__) -# define _LIBCPP_NO_THREAD_SAFETY_ANALYSIS __attribute__((__no_thread_safety_analysis__)) +# if __has_cpp_attribute(_Clang::__no_thread_safety_analysis__) +# define _LIBCPP_NO_THREAD_SAFETY_ANALYSIS [[_Clang::__no_thread_safety_analysis__]] # else # define _LIBCPP_NO_THREAD_SAFETY_ANALYSIS # endif @@ -1038,12 +819,8 @@ typedef __char32_t char32_t; // the latter depends on internal GNU libc details that are not appropriate // to depend on here, so any declarations present when __cpp_char8_t is not // defined are ignored. -# if defined(_LIBCPP_GLIBC_PREREQ) -# if _LIBCPP_GLIBC_PREREQ(2, 36) && defined(__cpp_char8_t) -# define _LIBCPP_HAS_C8RTOMB_MBRTOC8 1 -# else -# define _LIBCPP_HAS_C8RTOMB_MBRTOC8 0 -# endif +# if _LIBCPP_GLIBC_PREREQ(2, 36) && defined(__cpp_char8_t) +# define _LIBCPP_HAS_C8RTOMB_MBRTOC8 1 # else # define _LIBCPP_HAS_C8RTOMB_MBRTOC8 0 # endif @@ -1067,8 +844,7 @@ typedef __char32_t char32_t; # define _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(_ClassName) static_assert(true, "") # endif -// TODO(LLVM 22): Remove the workaround -# if defined(__OBJC__) && (!defined(_LIBCPP_CLANG_VER) || _LIBCPP_CLANG_VER < 2001) +# if defined(__OBJC__) && defined(_LIBCPP_APPLE_CLANG_VER) # define _LIBCPP_WORKAROUND_OBJCXX_COMPILER_INTRINSICS # endif @@ -1153,12 +929,33 @@ typedef __char32_t char32_t; # define _LIBCPP_DIAGNOSE_WARNING(...) # endif +# if __has_attribute(__diagnose_if__) && !defined(_LIBCPP_APPLE_CLANG_VER) && \ + (!defined(_LIBCPP_CLANG_VER) || _LIBCPP_CLANG_VER >= 2001) +# define _LIBCPP_DIAGNOSE_IF(...) __attribute__((__diagnose_if__(__VA_ARGS__))) +# else +# define _LIBCPP_DIAGNOSE_IF(...) +# endif + +# define _LIBCPP_DIAGNOSE_NULLPTR_IF(condition, condition_description) \ + _LIBCPP_DIAGNOSE_IF( \ + condition, \ + "null passed to callee that requires a non-null argument" condition_description, \ + "warning", \ + "nonnull") + # if __has_cpp_attribute(_Clang::__lifetimebound__) # define _LIBCPP_LIFETIMEBOUND [[_Clang::__lifetimebound__]] # else # define _LIBCPP_LIFETIMEBOUND # endif +// This is to work around https://llvm.org/PR156809 +# ifndef _LIBCPP_CXX03_LANG +# define _LIBCPP_CTOR_LIFETIMEBOUND _LIBCPP_LIFETIMEBOUND +# else +# define _LIBCPP_CTOR_LIFETIMEBOUND +# endif + # if __has_cpp_attribute(_Clang::__noescape__) # define _LIBCPP_NOESCAPE [[_Clang::__noescape__]] # else @@ -1172,12 +969,6 @@ typedef __char32_t char32_t; # define _LIBCPP_NO_SPECIALIZATIONS # endif -# if __has_cpp_attribute(_Clang::__standalone_debug__) -# define _LIBCPP_STANDALONE_DEBUG [[_Clang::__standalone_debug__]] -# else -# define _LIBCPP_STANDALONE_DEBUG -# endif - # if __has_cpp_attribute(_Clang::__preferred_name__) # define _LIBCPP_PREFERRED_NAME(x) [[_Clang::__preferred_name__(x)]] # else @@ -1257,14 +1048,6 @@ typedef __char32_t char32_t; # define _LIBCPP_DIAGNOSE_NULLPTR # endif -// TODO(LLVM 22): Remove this macro once LLVM19 support ends. __cpp_explicit_this_parameter has been set in LLVM20. -// Clang-18 has support for deducing this, but it does not set the FTM. -# if defined(__cpp_explicit_this_parameter) || (defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER >= 1800) -# define _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER 1 -# else -# define _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER 0 -# endif - #endif // __cplusplus #endif // _LIBCPP___CONFIG diff --git a/lib/libcxx/include/__configuration/abi.h b/lib/libcxx/include/__configuration/abi.h index 2f7c548465..d9623df71d 100644 --- a/lib/libcxx/include/__configuration/abi.h +++ b/lib/libcxx/include/__configuration/abi.h @@ -61,19 +61,9 @@ // According to the Standard, `bitset::operator[] const` returns bool # define _LIBCPP_ABI_BITSET_VECTOR_BOOL_CONST_SUBSCRIPT_RETURN_BOOL -// In LLVM 20, we've changed to take these ABI breaks unconditionally. These flags only exist in case someone is running -// into the static_asserts we added to catch the ABI break and don't care that it is one. -// TODO(LLVM 22): Remove these flags -# define _LIBCPP_ABI_LIST_REMOVE_NODE_POINTER_UB -# define _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB -# define _LIBCPP_ABI_FIX_UNORDERED_NODE_POINTER_UB -# define _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB - // These flags are documented in ABIGuarantees.rst # define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT -# define _LIBCPP_ABI_DO_NOT_EXPORT_BASIC_STRING_COMMON -# define _LIBCPP_ABI_DO_NOT_EXPORT_VECTOR_BASE_COMMON -# define _LIBCPP_ABI_DO_NOT_EXPORT_TO_CHARS_BASE_10 +# define _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE # define _LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI # define _LIBCPP_ABI_ENABLE_UNIQUE_PTR_TRIVIAL_ABI # define _LIBCPP_ABI_FIX_CITYHASH_IMPLEMENTATION @@ -84,25 +74,16 @@ # define _LIBCPP_ABI_NO_FILESYSTEM_INLINE_NAMESPACE # define _LIBCPP_ABI_NO_ITERATOR_BASES # define _LIBCPP_ABI_NO_RANDOM_DEVICE_COMPATIBILITY_LAYOUT +# define _LIBCPP_ABI_NO_REVERSE_ITERATOR_SECOND_MEMBER # define _LIBCPP_ABI_OPTIMIZED_FUNCTION # define _LIBCPP_ABI_REGEX_CONSTANTS_NONZERO # define _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION # define _LIBCPP_ABI_USE_WRAP_ITER_IN_STD_ARRAY # define _LIBCPP_ABI_USE_WRAP_ITER_IN_STD_STRING_VIEW # define _LIBCPP_ABI_VARIANT_INDEX_TYPE_OPTIMIZATION +# define _LIBCPP_ABI_TRIVIALLY_COPYABLE_BIT_ITERATOR #elif _LIBCPP_ABI_VERSION == 1 -# if !(defined(_LIBCPP_OBJECT_FORMAT_COFF) || defined(_LIBCPP_OBJECT_FORMAT_XCOFF)) -// Enable compiling copies of now inline methods into the dylib to support -// applications compiled against older libraries. This is unnecessary with -// COFF dllexport semantics, since dllexport forces a non-inline definition -// of inline functions to be emitted anyway. Our own non-inline copy would -// conflict with the dllexport-emitted copy, so we disable it. For XCOFF, -// the linker will take issue with the symbols in the shared object if the -// weak inline methods get visibility (such as from -fvisibility-inlines-hidden), -// so disable it. -# define _LIBCPP_DEPRECATED_ABI_LEGACY_LIBRARY_DEFINITIONS_FOR_INLINE_FUNCTIONS -# endif // Feature macros for disabling pre ABI v1 features. All of these options // are deprecated. # if defined(__FreeBSD__) @@ -110,11 +91,20 @@ # endif #endif +// TODO(LLVM 22): Remove this check +#if defined(_LIBCPP_ABI_NO_ITERATOR_BASES) && !defined(_LIBCPP_ABI_NO_REVERSE_ITERATOR_SECOND_MEMBER) +# ifndef _LIBCPP_ONLY_NO_ITERATOR_BASES +# error "You probably want to define _LIBCPP_ABI_NO_REVERSE_ITERATOR_SECOND_MEMBER. This has been split out from" \ + " _LIBCPP_ABI_NO_ITERATOR_BASES to allow only removing the second iterator member, since they aren't really related." \ + "If you actually want this ABI configuration, please define _LIBCPP_ONLY_NO_ITERATOR_BASES instead." +# endif +#endif + // We had some bugs where we use [[no_unique_address]] together with construct_at, // which causes UB as the call on construct_at could write to overlapping subobjects // -// https://github.com/llvm/llvm-project/issues/70506 -// https://github.com/llvm/llvm-project/issues/70494 +// https://llvm.org/PR70506 +// https://llvm.org/PR70494 // // To fix the bug we had to change the ABI of some classes to remove [[no_unique_address]] under certain conditions. // The macro below is used for all classes whose ABI have changed as part of fixing these bugs. diff --git a/lib/libcxx/include/__configuration/availability.h b/lib/libcxx/include/__configuration/availability.h index ae58e36b50..40e11b3314 100644 --- a/lib/libcxx/include/__configuration/availability.h +++ b/lib/libcxx/include/__configuration/availability.h @@ -17,62 +17,17 @@ # pragma GCC system_header #endif -// Libc++ is shipped by various vendors. In particular, it is used as a system -// library on macOS, iOS and other Apple platforms. In order for users to be -// able to compile a binary that is intended to be deployed to an older version -// of a platform, Clang provides availability attributes [1]. These attributes -// can be placed on declarations and are used to describe the life cycle of a -// symbol in the library. -// -// The main goal is to ensure a compile-time error if a symbol that hasn't been -// introduced in a previously released library is used in a program that targets -// that previously released library. Normally, this would be a load-time error -// when one tries to launch the program against the older library. -// -// For example, the filesystem library was introduced in the dylib in LLVM 9. -// On Apple platforms, this corresponds to macOS 10.15. If a user compiles on -// a macOS 10.15 host but targets macOS 10.13 with their program, the compiler -// would normally not complain (because the required declarations are in the -// headers), but the dynamic loader would fail to find the symbols when actually -// trying to launch the program on macOS 10.13. To turn this into a compile-time -// issue instead, declarations are annotated with when they were introduced, and -// the compiler can produce a diagnostic if the program references something that -// isn't available on the deployment target. -// -// This mechanism is general in nature, and any vendor can add their markup to -// the library (see below). Whenever a new feature is added that requires support -// in the shared library, two macros are added below to allow marking the feature -// as unavailable: -// 1. A macro named `_LIBCPP_AVAILABILITY_HAS_` which must be defined -// to `_LIBCPP_INTRODUCED_IN_` for the appropriate LLVM version. -// 2. A macro named `_LIBCPP_AVAILABILITY_`, which must be defined to -// `_LIBCPP_INTRODUCED_IN__MARKUP` for the appropriate LLVM version. -// -// When vendors decide to ship the feature as part of their shared library, they -// can update the `_LIBCPP_INTRODUCED_IN_` macro (and the markup counterpart) -// based on the platform version they shipped that version of LLVM in. The library -// will then use this markup to provide an optimal user experience on these platforms. -// -// Furthermore, many features in the standard library have corresponding -// feature-test macros. The `_LIBCPP_AVAILABILITY_HAS_` macros -// are checked by the corresponding feature-test macros generated by -// generate_feature_test_macro_components.py to ensure that the library -// doesn't announce a feature as being implemented if it is unavailable on -// the deployment target. -// -// Note that this mechanism is disabled by default in the "upstream" libc++. -// Availability annotations are only meaningful when shipping libc++ inside -// a platform (i.e. as a system library), and so vendors that want them should -// turn those annotations on at CMake configuration time. -// -// [1]: https://clang.llvm.org/docs/AttributeReference.html#availability +// This file defines a framework that can be used by vendors to encode the version of an operating system that various +// features of libc++ has been shipped in. This is primarily intended to allow safely deploying an executable built with +// a new version of the library on a platform containing an older version of the built library. +// Detailed documentation for this can be found at https://libcxx.llvm.org/VendorDocumentation.html#availability-markup // Availability markup is disabled when building the library, or when a non-Clang // compiler is used because only Clang supports the necessary attributes. // // We also allow users to force-disable availability markup via the `_LIBCPP_DISABLE_AVAILABILITY` // macro because that is the only way to work around a Clang bug related to availability -// attributes: https://github.com/llvm/llvm-project/issues/134151. +// attributes: https://llvm.org/PR134151. // Once that bug has been fixed, we should remove the macro. #if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || \ !defined(_LIBCPP_COMPILER_CLANG_BASED) || defined(_LIBCPP_DISABLE_AVAILABILITY) @@ -84,6 +39,9 @@ // in all versions of the library are available. #if !_LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS +# define _LIBCPP_INTRODUCED_IN_LLVM_22 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_22_ATTRIBUTE /* nothing */ + # define _LIBCPP_INTRODUCED_IN_LLVM_21 1 # define _LIBCPP_INTRODUCED_IN_LLVM_21_ATTRIBUTE /* nothing */ @@ -108,32 +66,55 @@ # define _LIBCPP_INTRODUCED_IN_LLVM_12 1 # define _LIBCPP_INTRODUCED_IN_LLVM_12_ATTRIBUTE /* nothing */ -# define _LIBCPP_INTRODUCED_IN_LLVM_11 1 -# define _LIBCPP_INTRODUCED_IN_LLVM_11_ATTRIBUTE /* nothing */ - -# define _LIBCPP_INTRODUCED_IN_LLVM_9 1 -# define _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE /* nothing */ -# define _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE_PUSH /* nothing */ -# define _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE_POP /* nothing */ - #elif defined(__APPLE__) // clang-format off +// LLVM 22 +// TODO: Fill this in +# define _LIBCPP_INTRODUCED_IN_LLVM_22 0 +# define _LIBCPP_INTRODUCED_IN_LLVM_22_ATTRIBUTE __attribute__((unavailable)) + // LLVM 21 // TODO: Fill this in # define _LIBCPP_INTRODUCED_IN_LLVM_21 0 # define _LIBCPP_INTRODUCED_IN_LLVM_21_ATTRIBUTE __attribute__((unavailable)) // LLVM 20 -// TODO: Fill this in -# define _LIBCPP_INTRODUCED_IN_LLVM_20 0 -# define _LIBCPP_INTRODUCED_IN_LLVM_20_ATTRIBUTE __attribute__((unavailable)) +// +// Note that versions for most Apple OSes were bumped forward and aligned in that release. +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 260000) || \ + (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 260000) || \ + (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 260000) || \ + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 260000) || \ + (defined(__ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__ < 100000) +# define _LIBCPP_INTRODUCED_IN_LLVM_20 0 +# else +# define _LIBCPP_INTRODUCED_IN_LLVM_20 1 +# endif +# define _LIBCPP_INTRODUCED_IN_LLVM_20_ATTRIBUTE \ + __attribute__((availability(macos, strict, introduced = 26.0))) \ + __attribute__((availability(ios, strict, introduced = 26.0))) \ + __attribute__((availability(tvos, strict, introduced = 26.0))) \ + __attribute__((availability(watchos, strict, introduced = 26.0))) \ + __attribute__((availability(bridgeos, strict, introduced = 10.0))) // LLVM 19 -// TODO: Fill this in -# define _LIBCPP_INTRODUCED_IN_LLVM_19 0 -# define _LIBCPP_INTRODUCED_IN_LLVM_19_ATTRIBUTE __attribute__((unavailable)) +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 150400) || \ + (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 180400) || \ + (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 180400) || \ + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 110400) || \ + (defined(__ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__ < 90400) +# define _LIBCPP_INTRODUCED_IN_LLVM_19 0 +# else +# define _LIBCPP_INTRODUCED_IN_LLVM_19 1 +# endif +# define _LIBCPP_INTRODUCED_IN_LLVM_19_ATTRIBUTE \ + __attribute__((availability(macos, strict, introduced = 15.4))) \ + __attribute__((availability(ios, strict, introduced = 18.4))) \ + __attribute__((availability(tvos, strict, introduced = 18.4))) \ + __attribute__((availability(watchos, strict, introduced = 11.4))) \ + __attribute__((availability(bridgeos, strict, introduced = 9.4))) // LLVM 18 # if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 150000) || \ @@ -215,47 +196,13 @@ __attribute__((availability(bridgeos, strict, introduced = 6.0))) \ __attribute__((availability(driverkit, strict, introduced = 21.3))) -// LLVM 11 -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 110000) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 140000) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 140000) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 70000) -# define _LIBCPP_INTRODUCED_IN_LLVM_11 0 -# else -# define _LIBCPP_INTRODUCED_IN_LLVM_11 1 +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 110000) || \ + (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 150000) || \ + (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 150000) || \ + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 80000) || \ + (defined(__ENVIRONMENT_DRIVERKIT_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_DRIVERKIT_VERSION_MIN_REQUIRED__ < 200000) +# warning "The selected platform is no longer supported by libc++." # endif -# define _LIBCPP_INTRODUCED_IN_LLVM_11_ATTRIBUTE \ - __attribute__((availability(macos, strict, introduced = 11.0))) \ - __attribute__((availability(ios, strict, introduced = 14.0))) \ - __attribute__((availability(tvos, strict, introduced = 14.0))) \ - __attribute__((availability(watchos, strict, introduced = 7.0))) - -// LLVM 9 -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 130000) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 130000) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 60000) -# define _LIBCPP_INTRODUCED_IN_LLVM_9 0 -# else -# define _LIBCPP_INTRODUCED_IN_LLVM_9 1 -# endif -# define _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE \ - __attribute__((availability(macos, strict, introduced = 10.15))) \ - __attribute__((availability(ios, strict, introduced = 13.0))) \ - __attribute__((availability(tvos, strict, introduced = 13.0))) \ - __attribute__((availability(watchos, strict, introduced = 6.0))) -# define _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE_PUSH \ - _Pragma("clang attribute push(__attribute__((availability(macos,strict,introduced=10.15))), apply_to=any(function,record))") \ - _Pragma("clang attribute push(__attribute__((availability(ios,strict,introduced=13.0))), apply_to=any(function,record))") \ - _Pragma("clang attribute push(__attribute__((availability(tvos,strict,introduced=13.0))), apply_to=any(function,record))") \ - _Pragma("clang attribute push(__attribute__((availability(watchos,strict,introduced=6.0))), apply_to=any(function,record))") -# define _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE_POP \ - _Pragma("clang attribute pop") \ - _Pragma("clang attribute pop") \ - _Pragma("clang attribute pop") \ - _Pragma("clang attribute pop") - -// clang-format on #else @@ -266,19 +213,12 @@ #endif -// These macros control the availability of all parts of that -// depend on something in the dylib. -#define _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_LIBRARY _LIBCPP_INTRODUCED_IN_LLVM_9 -#define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE -#define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE_PUSH -#define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE_POP - -// This controls the availability of the C++20 synchronization library, -// which requires shared library support for various operations -// (see libcxx/src/atomic.cpp). This includes , , -// , and notification functions on std::atomic. -#define _LIBCPP_AVAILABILITY_HAS_SYNC _LIBCPP_INTRODUCED_IN_LLVM_11 -#define _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INTRODUCED_IN_LLVM_11_ATTRIBUTE +// This controls the availability of new implementation of std::atomic's +// wait, notify_one and notify_all. The new implementation uses +// the native atomic wait/notify operations on platforms that support them +// based on the size of the atomic type, instead of the type itself. +#define _LIBCPP_AVAILABILITY_HAS_NEW_SYNC _LIBCPP_INTRODUCED_IN_LLVM_22 +#define _LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_INTRODUCED_IN_LLVM_22_ATTRIBUTE // Enable additional explicit instantiations of iostreams components. This // reduces the number of weak definitions generated in programs that use @@ -307,7 +247,7 @@ // This controls the availability of the C++17 std::pmr library, // which is implemented in large part in the built library. // -// TODO: Enable std::pmr markup once https://github.com/llvm/llvm-project/issues/40340 has been fixed +// TODO: Enable std::pmr markup once https://llvm.org/PR40340 has been fixed // Until then, it is possible for folks to try to use `std::pmr` when back-deploying to targets that don't support // it and it'll be a load-time error, but we don't have a good alternative because the library won't compile if we // use availability annotations until that bug has been fixed. @@ -364,4 +304,11 @@ # define _LIBCPP_AVAILABILITY_INIT_PRIMARY_EXCEPTION #endif +// Only define a bunch of symbols in the dylib if we need to be compatible with LLVM 7 headers or older +# if defined(_LIBCPP_BUILDING_LIBRARY) && _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 8 +# define _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 +# else +# define _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 _LIBCPP_HIDE_FROM_ABI +# endif + #endif // _LIBCPP___CONFIGURATION_AVAILABILITY_H diff --git a/lib/libcxx/include/__configuration/compiler.h b/lib/libcxx/include/__configuration/compiler.h index 54025c5b22..302b7ced67 100644 --- a/lib/libcxx/include/__configuration/compiler.h +++ b/lib/libcxx/include/__configuration/compiler.h @@ -33,16 +33,16 @@ // Warn if a compiler version is used that is not supported anymore // LLVM RELEASE Update the minimum compiler versions # if defined(_LIBCPP_CLANG_VER) -# if _LIBCPP_CLANG_VER < 1900 -# warning "Libc++ only supports Clang 19 and later" +# if _LIBCPP_CLANG_VER < 2001 +# warning "Libc++ only supports Clang 20 and later" # endif # elif defined(_LIBCPP_APPLE_CLANG_VER) -# if _LIBCPP_APPLE_CLANG_VER < 1500 -# warning "Libc++ only supports AppleClang 15 and later" +# if _LIBCPP_APPLE_CLANG_VER < 1700 +# warning "Libc++ only supports AppleClang 26 and later" # endif # elif defined(_LIBCPP_GCC_VER) -# if _LIBCPP_GCC_VER < 1400 -# warning "Libc++ only supports GCC 14 and later" +# if _LIBCPP_GCC_VER < 1500 +# warning "Libc++ only supports GCC 15 and later" # endif # endif diff --git a/lib/libcxx/include/__configuration/experimental.h b/lib/libcxx/include/__configuration/experimental.h new file mode 100644 index 0000000000..c688b017da --- /dev/null +++ b/lib/libcxx/include/__configuration/experimental.h @@ -0,0 +1,38 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___CONFIGURATION_EXPERIMENTAL_H +#define _LIBCPP___CONFIGURATION_EXPERIMENTAL_H + +/* zig patch: instead of including __config_site, zig adds -D flags when compiling */ + +#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER +# pragma GCC system_header +#endif + +#if __has_feature(experimental_library) +# ifndef _LIBCPP_ENABLE_EXPERIMENTAL +# define _LIBCPP_ENABLE_EXPERIMENTAL +# endif +#endif + +// Incomplete features get their own specific disabling flags. This makes it +// easier to grep for target specific flags once the feature is complete. +#if defined(_LIBCPP_ENABLE_EXPERIMENTAL) || defined(_LIBCPP_BUILDING_LIBRARY) +# define _LIBCPP_HAS_EXPERIMENTAL_LIBRARY 1 +#else +# define _LIBCPP_HAS_EXPERIMENTAL_LIBRARY 0 +#endif + +#define _LIBCPP_HAS_EXPERIMENTAL_PSTL _LIBCPP_HAS_EXPERIMENTAL_LIBRARY +#define _LIBCPP_HAS_EXPERIMENTAL_TZDB _LIBCPP_HAS_EXPERIMENTAL_LIBRARY +#define _LIBCPP_HAS_EXPERIMENTAL_SYNCSTREAM _LIBCPP_HAS_EXPERIMENTAL_LIBRARY +#define _LIBCPP_HAS_EXPERIMENTAL_HARDENING_OBSERVE_SEMANTIC _LIBCPP_HAS_EXPERIMENTAL_LIBRARY +#define _LIBCPP_HAS_EXPERIMENTAL_OPTIONAL_ITERATOR _LIBCPP_HAS_EXPERIMENTAL_LIBRARY + +#endif // _LIBCPP___CONFIGURATION_EXPERIMENTAL_H diff --git a/lib/libcxx/include/__configuration/hardening.h b/lib/libcxx/include/__configuration/hardening.h new file mode 100644 index 0000000000..bd172d99ac --- /dev/null +++ b/lib/libcxx/include/__configuration/hardening.h @@ -0,0 +1,215 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___CONFIGURATION_HARDENING_H +#define _LIBCPP___CONFIGURATION_HARDENING_H + +/* zig patch: instead of including __config_site, zig adds -D flags when compiling */ +#include <__configuration/experimental.h> +#include <__configuration/language.h> + +#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER +# pragma GCC system_header +#endif + +// TODO(LLVM 23): Remove this. We're making these an error to catch folks who might not have migrated. +// Since hardening went through several changes (many of which impacted user-facing macros), +// we're keeping these checks around for a bit longer than usual. Failure to properly configure +// hardening results in checks being dropped silently, which is a pretty big deal. +#if defined(_LIBCPP_ENABLE_ASSERTIONS) +# error "_LIBCPP_ENABLE_ASSERTIONS has been removed, please use _LIBCPP_HARDENING_MODE= instead (see docs)" +#endif +#if defined(_LIBCPP_ENABLE_HARDENED_MODE) +# error "_LIBCPP_ENABLE_HARDENED_MODE has been removed, please use _LIBCPP_HARDENING_MODE= instead (see docs)" +#endif +#if defined(_LIBCPP_ENABLE_SAFE_MODE) +# error "_LIBCPP_ENABLE_SAFE_MODE has been removed, please use _LIBCPP_HARDENING_MODE= instead (see docs)" +#endif +#if defined(_LIBCPP_ENABLE_DEBUG_MODE) +# error "_LIBCPP_ENABLE_DEBUG_MODE has been removed, please use _LIBCPP_HARDENING_MODE= instead (see docs)" +#endif + +// The library provides the macro `_LIBCPP_HARDENING_MODE` which can be set to one of the following values: +// +// - `_LIBCPP_HARDENING_MODE_NONE`; +// - `_LIBCPP_HARDENING_MODE_FAST`; +// - `_LIBCPP_HARDENING_MODE_EXTENSIVE`; +// - `_LIBCPP_HARDENING_MODE_DEBUG`. +// +// These values have the following effects: +// +// - `_LIBCPP_HARDENING_MODE_NONE` -- sets the hardening mode to "none" which disables all runtime hardening checks; +// +// - `_LIBCPP_HARDENING_MODE_FAST` -- sets that hardening mode to "fast". The fast mode enables security-critical checks +// that can be done with relatively little runtime overhead in constant time; +// +// - `_LIBCPP_HARDENING_MODE_EXTENSIVE` -- sets the hardening mode to "extensive". The extensive mode is a superset of +// the fast mode that additionally enables checks that are relatively cheap and prevent common types of logic errors +// but are not necessarily security-critical; +// +// - `_LIBCPP_HARDENING_MODE_DEBUG` -- sets the hardening mode to "debug". The debug mode is a superset of the extensive +// mode and enables all checks available in the library, including internal assertions. Checks that are part of the +// debug mode can be very expensive and thus the debug mode is intended to be used for testing, not in production. + +// Inside the library, assertions are categorized so they can be cherry-picked based on the chosen hardening mode. These +// macros are only for internal use -- users should only pick one of the high-level hardening modes described above. +// +// - `_LIBCPP_ASSERT_VALID_INPUT_RANGE` -- checks that ranges (whether expressed as an iterator pair, an iterator and +// a sentinel, an iterator and a count, or a `std::range`) given as input to library functions are valid: +// - the sentinel is reachable from the begin iterator; +// - TODO(hardening): both iterators refer to the same container. +// +// - `_LIBCPP_ASSERT_VALID_ELEMENT_ACCESS` -- checks that any attempts to access a container element, whether through +// the container object or through an iterator, are valid and do not attempt to go out of bounds or otherwise access +// a non-existent element. For iterator checks to work, bounded iterators must be enabled in the ABI. Types like +// `optional` and `function` are considered one-element containers for the purposes of this check. +// +// - `_LIBCPP_ASSERT_NON_NULL` -- checks that the pointer being dereferenced is not null. On most modern platforms zero +// address does not refer to an actual location in memory, so a null pointer dereference would not compromize the +// memory security of a program (however, it is still undefined behavior that can result in strange errors due to +// compiler optimizations). +// +// - `_LIBCPP_ASSERT_NON_OVERLAPPING_RANGES` -- for functions that take several ranges as arguments, checks that the +// given ranges do not overlap. +// +// - `_LIBCPP_ASSERT_VALID_DEALLOCATION` -- checks that an attempt to deallocate memory is valid (e.g. the given object +// was allocated by the given allocator). Violating this category typically results in a memory leak. +// +// - `_LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL` -- checks that a call to an external API doesn't fail in +// an unexpected manner. This includes triggering documented cases of undefined behavior in an external library (like +// attempting to unlock an unlocked mutex in pthreads). Any API external to the library falls under this category +// (from system calls to compiler intrinsics). We generally don't expect these failures to compromize memory safety or +// otherwise create an immediate security issue. +// +// - `_LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR` -- checks any operations that exchange nodes between containers to make sure +// the containers have compatible allocators. +// +// - `_LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN` -- checks that the given argument is within the domain of valid arguments +// for the function. Violating this typically produces an incorrect result (e.g. the clamp algorithm returns the +// original value without clamping it due to incorrect functors) or puts an object into an invalid state (e.g. +// a string view where only a subset of elements is possible to access). This category is for assertions violating +// which doesn't cause any immediate issues in the library -- whatever the consequences are, they will happen in the +// user code. +// +// - `_LIBCPP_ASSERT_PEDANTIC` -- checks prerequisites which are imposed by the Standard, but violating which happens to +// be benign in our implementation. +// +// - `_LIBCPP_ASSERT_SEMANTIC_REQUIREMENT` -- checks that the given argument satisfies the semantic requirements imposed +// by the Standard. Typically, there is no simple way to completely prove that a semantic requirement is satisfied; +// thus, this would often be a heuristic check and it might be quite expensive. +// +// - `_LIBCPP_ASSERT_INTERNAL` -- checks that internal invariants of the library hold. These assertions don't depend on +// user input. +// +// - `_LIBCPP_ASSERT_UNCATEGORIZED` -- for assertions that haven't been properly classified yet. + +// clang-format off +# define _LIBCPP_HARDENING_MODE_NONE (1 << 1) +# define _LIBCPP_HARDENING_MODE_FAST (1 << 2) +# define _LIBCPP_HARDENING_MODE_EXTENSIVE (1 << 4) // Deliberately not ordered. +# define _LIBCPP_HARDENING_MODE_DEBUG (1 << 3) +// clang-format on + +#ifndef _LIBCPP_HARDENING_MODE + +# ifndef _LIBCPP_HARDENING_MODE_DEFAULT +# error _LIBCPP_HARDENING_MODE_DEFAULT is not defined. This definition should be set at configuration time in the \ +`__config_site` header, please make sure your installation of libc++ is not broken. +# endif + +# define _LIBCPP_HARDENING_MODE _LIBCPP_HARDENING_MODE_DEFAULT +#endif + +#if _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_NONE && _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_FAST && \ + _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_EXTENSIVE && \ + _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_DEBUG +# error _LIBCPP_HARDENING_MODE must be set to one of the following values: \ +_LIBCPP_HARDENING_MODE_NONE, \ +_LIBCPP_HARDENING_MODE_FAST, \ +_LIBCPP_HARDENING_MODE_EXTENSIVE, \ +_LIBCPP_HARDENING_MODE_DEBUG +#endif + +// The library provides the macro `_LIBCPP_ASSERTION_SEMANTIC` for configuring the assertion semantic used by hardening; +// it can be set to one of the following values: +// +// - `_LIBCPP_ASSERTION_SEMANTIC_IGNORE`; +// - `_LIBCPP_ASSERTION_SEMANTIC_OBSERVE`; +// - `_LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE`; +// - `_LIBCPP_ASSERTION_SEMANTIC_ENFORCE`. +// +// libc++ assertion semantics generally mirror the evaluation semantics of C++26 Contracts: +// - `ignore` evaluates the assertion but doesn't do anything if it fails (note that it differs from the Contracts +// `ignore` semantic which wouldn't evaluate the assertion at all); +// - `observe` logs an error (indicating, if possible, that the error is fatal) and continues execution; +// - `quick-enforce` terminates the program as fast as possible (via trapping); +// - `enforce` logs an error and then terminates the program. +// +// Additionally, a special `hardening-dependent` value selects the assertion semantic based on the hardening mode in +// effect: the production-capable modes (`fast` and `extensive`) map to `quick_enforce` and the `debug` mode maps to +// `enforce`. The `hardening-dependent` semantic cannot be selected explicitly, it is only used when no assertion +// semantic is provided by the user _and_ the library's default semantic is configured to be dependent on hardening. +// +// Notes: +// - Continuing execution after a hardening check fails results in undefined behavior; the `observe` semantic is meant +// to make adopting hardening easier but should not be used outside of this scenario; +// - C++26 wording for Library Hardening precludes a conforming Hardened implementation from using the Contracts +// `ignore` semantic when evaluating hardened preconditions in the Library. Libc++ allows using this semantic for +// hardened preconditions, however, be aware that using `ignore` does not produce a conforming "Hardened" +// implementation, unlike the other semantics above. +// clang-format off +# define _LIBCPP_ASSERTION_SEMANTIC_HARDENING_DEPENDENT (1 << 1) +# define _LIBCPP_ASSERTION_SEMANTIC_IGNORE (1 << 2) +# define _LIBCPP_ASSERTION_SEMANTIC_OBSERVE (1 << 3) +# define _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE (1 << 4) +# define _LIBCPP_ASSERTION_SEMANTIC_ENFORCE (1 << 5) +// clang-format on + +// If the user attempts to configure the assertion semantic, check that it is allowed in the current environment. +#if defined(_LIBCPP_ASSERTION_SEMANTIC) +# if !_LIBCPP_HAS_EXPERIMENTAL_LIBRARY +# error "Assertion semantics are an experimental feature." +# endif +# if defined(_LIBCPP_CXX03_LANG) +# error "Assertion semantics are not available in the C++03 mode." +# endif +#endif // defined(_LIBCPP_ASSERTION_SEMANTIC) + +// User-provided semantic takes top priority -- don't override if set. +#ifndef _LIBCPP_ASSERTION_SEMANTIC + +# ifndef _LIBCPP_ASSERTION_SEMANTIC_DEFAULT +# error _LIBCPP_ASSERTION_SEMANTIC_DEFAULT is not defined. This definition should be set at configuration time in \ +the `__config_site` header, please make sure your installation of libc++ is not broken. +# endif + +# if _LIBCPP_ASSERTION_SEMANTIC_DEFAULT != _LIBCPP_ASSERTION_SEMANTIC_HARDENING_DEPENDENT +# define _LIBCPP_ASSERTION_SEMANTIC _LIBCPP_ASSERTION_SEMANTIC_DEFAULT +# else +# if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG +# define _LIBCPP_ASSERTION_SEMANTIC _LIBCPP_ASSERTION_SEMANTIC_ENFORCE +# else +# define _LIBCPP_ASSERTION_SEMANTIC _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE +# endif +# endif // _LIBCPP_ASSERTION_SEMANTIC_DEFAULT != _LIBCPP_ASSERTION_SEMANTIC_HARDENING_DEPENDENT + +#endif // #ifndef _LIBCPP_ASSERTION_SEMANTIC + +// Finally, validate the selected semantic (in case the user tries setting it to an incorrect value): +#if _LIBCPP_ASSERTION_SEMANTIC != _LIBCPP_ASSERTION_SEMANTIC_IGNORE && \ + _LIBCPP_ASSERTION_SEMANTIC != _LIBCPP_ASSERTION_SEMANTIC_OBSERVE && \ + _LIBCPP_ASSERTION_SEMANTIC != _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE && \ + _LIBCPP_ASSERTION_SEMANTIC != _LIBCPP_ASSERTION_SEMANTIC_ENFORCE +# error _LIBCPP_ASSERTION_SEMANTIC must be set to one of the following values: \ +_LIBCPP_ASSERTION_SEMANTIC_IGNORE, \ +_LIBCPP_ASSERTION_SEMANTIC_OBSERVE, \ +_LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE, \ +_LIBCPP_ASSERTION_SEMANTIC_ENFORCE +#endif + +#endif // _LIBCPP___CONFIGURATION_HARDENING_H diff --git a/lib/libcxx/include/__configuration/language.h b/lib/libcxx/include/__configuration/language.h index 6cf5805f2b..6fef4f396b 100644 --- a/lib/libcxx/include/__configuration/language.h +++ b/lib/libcxx/include/__configuration/language.h @@ -18,6 +18,9 @@ // NOLINTBEGIN(libcpp-cpp-version-check) #ifdef __cplusplus +# if __cplusplus < 201103L +# define _LIBCPP_CXX03_LANG +# endif # if __cplusplus <= 201103L # define _LIBCPP_STD_VER 11 # elif __cplusplus <= 201402L diff --git a/lib/libcxx/include/__configuration/platform.h b/lib/libcxx/include/__configuration/platform.h index 1a83b0dc27..7492346fa1 100644 --- a/lib/libcxx/include/__configuration/platform.h +++ b/lib/libcxx/include/__configuration/platform.h @@ -31,22 +31,15 @@ #endif // Need to detect which libc we're using if we're on Linux. -#if defined(__linux__) || defined(__AMDGPU__) || defined(__NVPTX__) -# if __has_include() -# include -# if defined(__GLIBC_PREREQ) -# define _LIBCPP_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b) -# else -# define _LIBCPP_GLIBC_PREREQ(a, b) 0 -# endif // defined(__GLIBC_PREREQ) -# endif -#endif - -// This is required in order for _NEWLIB_VERSION to be defined in places where we use it. -// TODO: We shouldn't be including arbitrarily-named headers from libc++ since this can break valid -// user code. Move code paths that need _NEWLIB_VERSION to another customization mechanism. -#if __has_include() -# include +#if (defined(__linux__) || defined(__AMDGPU__) || defined(__NVPTX__)) && __has_include() +# include +# if defined(__GLIBC_PREREQ) +# define _LIBCPP_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b) +# else +# define _LIBCPP_GLIBC_PREREQ(a, b) 0 +# endif // defined(__GLIBC_PREREQ) +#else +# define _LIBCPP_GLIBC_PREREQ(a, b) 0 #endif #ifndef __BYTE_ORDER__ diff --git a/lib/libcxx/include/__coroutine/coroutine_handle.h b/lib/libcxx/include/__coroutine/coroutine_handle.h index b7add25851..b26a650748 100644 --- a/lib/libcxx/include/__coroutine/coroutine_handle.h +++ b/lib/libcxx/include/__coroutine/coroutine_handle.h @@ -44,9 +44,9 @@ public: } // [coroutine.handle.export.import], export/import - _LIBCPP_HIDE_FROM_ABI constexpr void* address() const noexcept { return __handle_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr void* address() const noexcept { return __handle_; } - _LIBCPP_HIDE_FROM_ABI static constexpr coroutine_handle from_address(void* __addr) noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr coroutine_handle from_address(void* __addr) noexcept { coroutine_handle __tmp; __tmp.__handle_ = __addr; return __tmp; @@ -55,7 +55,7 @@ public: // [coroutine.handle.observers], observers _LIBCPP_HIDE_FROM_ABI constexpr explicit operator bool() const noexcept { return __handle_ != nullptr; } - _LIBCPP_HIDE_FROM_ABI bool done() const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool done() const { _LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL(__is_suspended(), "done() can be called only on suspended coroutines"); return __builtin_coro_done(__handle_); } @@ -100,7 +100,7 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr coroutine_handle(nullptr_t) noexcept {} - _LIBCPP_HIDE_FROM_ABI static coroutine_handle from_promise(_Promise& __promise) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static coroutine_handle from_promise(_Promise& __promise) { using _RawPromise = __remove_cv_t<_Promise>; coroutine_handle __tmp; __tmp.__handle_ = @@ -114,9 +114,9 @@ public: } // [coroutine.handle.export.import], export/import - _LIBCPP_HIDE_FROM_ABI constexpr void* address() const noexcept { return __handle_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr void* address() const noexcept { return __handle_; } - _LIBCPP_HIDE_FROM_ABI static constexpr coroutine_handle from_address(void* __addr) noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr coroutine_handle from_address(void* __addr) noexcept { coroutine_handle __tmp; __tmp.__handle_ = __addr; return __tmp; @@ -130,7 +130,7 @@ public: // [coroutine.handle.observers], observers _LIBCPP_HIDE_FROM_ABI constexpr explicit operator bool() const noexcept { return __handle_ != nullptr; } - _LIBCPP_HIDE_FROM_ABI bool done() const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool done() const { _LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL(__is_suspended(), "done() can be called only on suspended coroutines"); return __builtin_coro_done(__handle_); } @@ -150,7 +150,7 @@ public: } // [coroutine.handle.promise], promise access - _LIBCPP_HIDE_FROM_ABI _Promise& promise() const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _Promise& promise() const { return *static_cast<_Promise*>(__builtin_coro_promise(this->__handle_, alignof(_Promise), false)); } @@ -165,7 +165,7 @@ private: // [coroutine.handle.hash] template struct hash> { - _LIBCPP_HIDE_FROM_ABI size_t operator()(const coroutine_handle<_Tp>& __v) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t operator()(const coroutine_handle<_Tp>& __v) const noexcept { return hash()(__v.address()); } }; diff --git a/lib/libcxx/include/__coroutine/noop_coroutine_handle.h b/lib/libcxx/include/__coroutine/noop_coroutine_handle.h index 2b2838b6bf..b9c54d3b42 100644 --- a/lib/libcxx/include/__coroutine/noop_coroutine_handle.h +++ b/lib/libcxx/include/__coroutine/noop_coroutine_handle.h @@ -20,8 +20,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD -# if __has_builtin(__builtin_coro_noop) || defined(_LIBCPP_COMPILER_GCC) - // [coroutine.noop] // [coroutine.promise.noop] struct noop_coroutine_promise {}; @@ -37,7 +35,7 @@ public: // [coroutine.handle.noop.observers], observers _LIBCPP_HIDE_FROM_ABI constexpr explicit operator bool() const noexcept { return true; } - _LIBCPP_HIDE_FROM_ABI constexpr bool done() const noexcept { return false; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool done() const noexcept { return false; } // [coroutine.handle.noop.resumption], resumption _LIBCPP_HIDE_FROM_ABI constexpr void operator()() const noexcept {} @@ -45,23 +43,23 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr void destroy() const noexcept {} // [coroutine.handle.noop.promise], promise access - _LIBCPP_HIDE_FROM_ABI noop_coroutine_promise& promise() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI noop_coroutine_promise& promise() const noexcept { return *static_cast( __builtin_coro_promise(this->__handle_, alignof(noop_coroutine_promise), false)); } // [coroutine.handle.noop.address], address - _LIBCPP_HIDE_FROM_ABI constexpr void* address() const noexcept { return __handle_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr void* address() const noexcept { return __handle_; } private: _LIBCPP_HIDE_FROM_ABI friend coroutine_handle noop_coroutine() noexcept; -# if __has_builtin(__builtin_coro_noop) +# if __has_builtin(__builtin_coro_noop) _LIBCPP_HIDE_FROM_ABI coroutine_handle() noexcept { this->__handle_ = __builtin_coro_noop(); } void* __handle_ = nullptr; -# elif defined(_LIBCPP_COMPILER_GCC) +# elif defined(_LIBCPP_COMPILER_GCC) // GCC doesn't implement __builtin_coro_noop(). // Construct the coroutine frame manually instead. struct __noop_coroutine_frame_ty_ { @@ -78,19 +76,19 @@ private: _LIBCPP_HIDE_FROM_ABI coroutine_handle() noexcept = default; -# endif // __has_builtin(__builtin_coro_noop) +# endif // __has_builtin(__builtin_coro_noop) }; using noop_coroutine_handle = coroutine_handle; -# if defined(_LIBCPP_COMPILER_GCC) +# if defined(_LIBCPP_COMPILER_GCC) inline noop_coroutine_handle::__noop_coroutine_frame_ty_ noop_coroutine_handle::__noop_coroutine_frame_{}; -# endif +# endif // [coroutine.noop.coroutine] -inline _LIBCPP_HIDE_FROM_ABI noop_coroutine_handle noop_coroutine() noexcept { return noop_coroutine_handle(); } - -# endif // __has_builtin(__builtin_coro_noop) || defined(_LIBCPP_COMPILER_GCC) +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI noop_coroutine_handle noop_coroutine() noexcept { + return noop_coroutine_handle(); +} _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__debug_utils/strict_weak_ordering_check.h b/lib/libcxx/include/__debug_utils/strict_weak_ordering_check.h index 3a9d887284..3724ca95c5 100644 --- a/lib/libcxx/include/__debug_utils/strict_weak_ordering_check.h +++ b/lib/libcxx/include/__debug_utils/strict_weak_ordering_check.h @@ -27,7 +27,7 @@ template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void __check_strict_weak_ordering_sorted(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp& __comp) { #if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG - using __diff_t = __iter_diff_t<_RandomAccessIterator>; + using __diff_t = __iterator_difference_type<_RandomAccessIterator>; using _Comp_ref = __comp_ref_type<_Comp>; if (!__libcpp_is_constant_evaluated()) { // Check if the range is actually sorted. diff --git a/lib/libcxx/include/__exception/exception.h b/lib/libcxx/include/__exception/exception.h index f7dab6e83a..ddc34b0fa8 100644 --- a/lib/libcxx/include/__exception/exception.h +++ b/lib/libcxx/include/__exception/exception.h @@ -48,13 +48,15 @@ public: __data_._DoFree = true; } - exception(exception const&) _NOEXCEPT {} + exception(exception const&) _NOEXCEPT : __data_() {} exception& operator=(exception const&) _NOEXCEPT { return *this; } virtual ~exception() _NOEXCEPT {} - virtual char const* what() const _NOEXCEPT { return __data_._What ? __data_._What : "Unknown exception"; } + [[__nodiscard__]] virtual char const* what() const _NOEXCEPT { + return __data_._What ? __data_._What : "Unknown exception"; + } private: __std_exception_data __data_; @@ -76,7 +78,7 @@ public: _LIBCPP_HIDE_FROM_ABI exception& operator=(const exception&) _NOEXCEPT = default; virtual ~exception() _NOEXCEPT; - virtual const char* what() const _NOEXCEPT; + [[__nodiscard__]] virtual const char* what() const _NOEXCEPT; }; class _LIBCPP_EXPORTED_FROM_ABI bad_exception : public exception { @@ -85,7 +87,7 @@ public: _LIBCPP_HIDE_FROM_ABI bad_exception(const bad_exception&) _NOEXCEPT = default; _LIBCPP_HIDE_FROM_ABI bad_exception& operator=(const bad_exception&) _NOEXCEPT = default; ~bad_exception() _NOEXCEPT override; - const char* what() const _NOEXCEPT override; + [[__nodiscard__]] const char* what() const _NOEXCEPT override; }; #endif // !_LIBCPP_ABI_VCRUNTIME diff --git a/lib/libcxx/include/__exception/exception_ptr.h b/lib/libcxx/include/__exception/exception_ptr.h index 796fa924be..92ff5c701e 100644 --- a/lib/libcxx/include/__exception/exception_ptr.h +++ b/lib/libcxx/include/__exception/exception_ptr.h @@ -11,18 +11,24 @@ #include <__config> #include <__cstddef/nullptr_t.h> +#include <__cstddef/size_t.h> #include <__exception/operations.h> #include <__memory/addressof.h> #include <__memory/construct_at.h> #include <__type_traits/decay.h> #include <__type_traits/is_pointer.h> -#include +#include <__utility/move.h> +#include <__utility/swap.h> +#include <__verbose_abort> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + #ifndef _LIBCPP_ABI_MICROSOFT # if _LIBCPP_AVAILABILITY_HAS_INIT_PRIMARY_EXCEPTION @@ -30,7 +36,7 @@ namespace __cxxabiv1 { extern "C" { -_LIBCPP_OVERRIDABLE_FUNC_VIS void* __cxa_allocate_exception(size_t) throw(); +_LIBCPP_OVERRIDABLE_FUNC_VIS void* __cxa_allocate_exception(std::size_t) throw(); _LIBCPP_OVERRIDABLE_FUNC_VIS void __cxa_free_exception(void*) throw(); struct __cxa_exception; @@ -57,6 +63,8 @@ _LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD #ifndef _LIBCPP_ABI_MICROSOFT +inline _LIBCPP_HIDE_FROM_ABI void swap(exception_ptr& __x, exception_ptr& __y) _NOEXCEPT; + class _LIBCPP_EXPORTED_FROM_ABI exception_ptr { void* __ptr_; @@ -67,15 +75,21 @@ class _LIBCPP_EXPORTED_FROM_ABI exception_ptr { public: // exception_ptr is basically a COW string so it is trivially relocatable. - // It is also replaceable because assignment has normal value semantics. using __trivially_relocatable _LIBCPP_NODEBUG = exception_ptr; - using __replaceable _LIBCPP_NODEBUG = exception_ptr; _LIBCPP_HIDE_FROM_ABI exception_ptr() _NOEXCEPT : __ptr_() {} _LIBCPP_HIDE_FROM_ABI exception_ptr(nullptr_t) _NOEXCEPT : __ptr_() {} exception_ptr(const exception_ptr&) _NOEXCEPT; + _LIBCPP_HIDE_FROM_ABI exception_ptr(exception_ptr&& __other) _NOEXCEPT : __ptr_(__other.__ptr_) { + __other.__ptr_ = nullptr; + } exception_ptr& operator=(const exception_ptr&) _NOEXCEPT; + _LIBCPP_HIDE_FROM_ABI exception_ptr& operator=(exception_ptr&& __other) _NOEXCEPT { + exception_ptr __tmp(std::move(__other)); + std::swap(__tmp, *this); + return *this; + } ~exception_ptr() _NOEXCEPT; _LIBCPP_HIDE_FROM_ABI explicit operator bool() const _NOEXCEPT { return __ptr_ != nullptr; } @@ -88,10 +102,16 @@ public: return !(__x == __y); } + friend _LIBCPP_HIDE_FROM_ABI void swap(exception_ptr& __x, exception_ptr& __y) _NOEXCEPT; + friend _LIBCPP_EXPORTED_FROM_ABI exception_ptr current_exception() _NOEXCEPT; friend _LIBCPP_EXPORTED_FROM_ABI void rethrow_exception(exception_ptr); }; +inline _LIBCPP_HIDE_FROM_ABI void swap(exception_ptr& __x, exception_ptr& __y) _NOEXCEPT { + std::swap(__x.__ptr_, __y.__ptr_); +} + # if _LIBCPP_HAS_EXCEPTIONS # if _LIBCPP_AVAILABILITY_HAS_INIT_PRIMARY_EXCEPTION template @@ -153,7 +173,7 @@ _LIBCPP_HIDE_FROM_ABI exception_ptr make_exception_ptr(_Ep __e) _NOEXCEPT { # else // !_LIBCPP_HAS_EXCEPTIONS template _LIBCPP_HIDE_FROM_ABI exception_ptr make_exception_ptr(_Ep) _NOEXCEPT { - std::abort(); + _LIBCPP_VERBOSE_ABORT("make_exception_ptr was called in -fno-exceptions mode"); } # endif // _LIBCPP_HAS_EXCEPTIONS @@ -201,4 +221,6 @@ _LIBCPP_HIDE_FROM_ABI exception_ptr make_exception_ptr(_Ep __e) _NOEXCEPT { #endif // _LIBCPP_ABI_MICROSOFT _LIBCPP_END_UNVERSIONED_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___EXCEPTION_EXCEPTION_PTR_H diff --git a/lib/libcxx/include/__exception/nested_exception.h b/lib/libcxx/include/__exception/nested_exception.h index 90b14158d5..dd84efbccd 100644 --- a/lib/libcxx/include/__exception/nested_exception.h +++ b/lib/libcxx/include/__exception/nested_exception.h @@ -40,7 +40,7 @@ public: // access functions [[__noreturn__]] void rethrow_nested() const; - _LIBCPP_HIDE_FROM_ABI exception_ptr nested_ptr() const _NOEXCEPT { return __ptr_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI exception_ptr nested_ptr() const _NOEXCEPT { return __ptr_; } }; template @@ -73,7 +73,7 @@ template __throw_with_nested<_Tp, _Up, is_class<_Up>::value && !is_base_of::value && - !__libcpp_is_final<_Up>::value>::__do_throw(std::forward<_Tp>(__t)); + !__is_final_v<_Up> >::__do_throw(std::forward<_Tp>(__t)); #else ((void)__t); // FIXME: Make this abort diff --git a/lib/libcxx/include/__exception/operations.h b/lib/libcxx/include/__exception/operations.h index 29d5c698a9..2b93ad260c 100644 --- a/lib/libcxx/include/__exception/operations.h +++ b/lib/libcxx/include/__exception/operations.h @@ -20,22 +20,22 @@ _LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD defined(_LIBCPP_BUILDING_LIBRARY) using unexpected_handler = void (*)(); _LIBCPP_EXPORTED_FROM_ABI unexpected_handler set_unexpected(unexpected_handler) _NOEXCEPT; -_LIBCPP_EXPORTED_FROM_ABI unexpected_handler get_unexpected() _NOEXCEPT; +[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI unexpected_handler get_unexpected() _NOEXCEPT; [[__noreturn__]] _LIBCPP_EXPORTED_FROM_ABI void unexpected(); #endif using terminate_handler = void (*)(); _LIBCPP_EXPORTED_FROM_ABI terminate_handler set_terminate(terminate_handler) _NOEXCEPT; -_LIBCPP_EXPORTED_FROM_ABI terminate_handler get_terminate() _NOEXCEPT; +[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI terminate_handler get_terminate() _NOEXCEPT; #if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_UNCAUGHT_EXCEPTION) -_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_DEPRECATED_IN_CXX17 bool uncaught_exception() _NOEXCEPT; +[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_DEPRECATED_IN_CXX17 bool uncaught_exception() _NOEXCEPT; #endif // _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_UNCAUGHT_EXCEPTION) -_LIBCPP_EXPORTED_FROM_ABI int uncaught_exceptions() _NOEXCEPT; +[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI int uncaught_exceptions() _NOEXCEPT; class _LIBCPP_EXPORTED_FROM_ABI exception_ptr; -_LIBCPP_EXPORTED_FROM_ABI exception_ptr current_exception() _NOEXCEPT; +[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI exception_ptr current_exception() _NOEXCEPT; [[__noreturn__]] _LIBCPP_EXPORTED_FROM_ABI void rethrow_exception(exception_ptr); _LIBCPP_END_UNVERSIONED_NAMESPACE_STD diff --git a/lib/libcxx/include/__expected/bad_expected_access.h b/lib/libcxx/include/__expected/bad_expected_access.h index 1b734389e8..b1958101d5 100644 --- a/lib/libcxx/include/__expected/bad_expected_access.h +++ b/lib/libcxx/include/__expected/bad_expected_access.h @@ -43,9 +43,11 @@ protected: public: # if _LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION - const char* what() const noexcept override; + [[nodiscard]] const char* what() const noexcept override; # else - _LIBCPP_HIDE_FROM_ABI_VIRTUAL const char* what() const noexcept override { return "bad access to std::expected"; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI_VIRTUAL const char* what() const noexcept override { + return "bad access to std::expected"; + } # endif }; _LIBCPP_DIAGNOSTIC_POP @@ -55,10 +57,10 @@ class bad_expected_access : public bad_expected_access { public: _LIBCPP_HIDE_FROM_ABI explicit bad_expected_access(_Err __e) : __unex_(std::move(__e)) {} - _LIBCPP_HIDE_FROM_ABI _Err& error() & noexcept { return __unex_; } - _LIBCPP_HIDE_FROM_ABI const _Err& error() const& noexcept { return __unex_; } - _LIBCPP_HIDE_FROM_ABI _Err&& error() && noexcept { return std::move(__unex_); } - _LIBCPP_HIDE_FROM_ABI const _Err&& error() const&& noexcept { return std::move(__unex_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _Err& error() & noexcept { return __unex_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const _Err& error() const& noexcept { return __unex_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _Err&& error() && noexcept { return std::move(__unex_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const _Err&& error() const&& noexcept { return std::move(__unex_); } private: _Err __unex_; diff --git a/lib/libcxx/include/__expected/expected.h b/lib/libcxx/include/__expected/expected.h index 0f446b8707..24ae33d4e3 100644 --- a/lib/libcxx/include/__expected/expected.h +++ b/lib/libcxx/include/__expected/expected.h @@ -30,7 +30,6 @@ #include <__type_traits/is_nothrow_assignable.h> #include <__type_traits/is_nothrow_constructible.h> #include <__type_traits/is_reference.h> -#include <__type_traits/is_replaceable.h> #include <__type_traits/is_same.h> #include <__type_traits/is_swappable.h> #include <__type_traits/is_trivially_constructible.h> @@ -472,8 +471,6 @@ public: __conditional_t<__libcpp_is_trivially_relocatable<_Tp>::value && __libcpp_is_trivially_relocatable<_Err>::value, expected, void>; - using __replaceable _LIBCPP_NODEBUG = - __conditional_t<__is_replaceable_v<_Tp> && __is_replaceable_v<_Err>, expected, void>; template using rebind = expected<_Up, error_type>; @@ -555,9 +552,10 @@ public: is_nothrow_constructible_v<_Tp, _Up> && is_nothrow_constructible_v<_Err, _OtherErr>) // strengthened : __base(__other.__has_val(), std::move(__other.__union())) {} - template + template > requires(!is_same_v, in_place_t> && !is_same_v> && - is_constructible_v<_Tp, _Up> && !__is_std_unexpected>::value && + !is_same_v, unexpect_t> && is_constructible_v<_Tp, _Up> && + !__is_std_unexpected>::value && (!is_same_v, bool> || !__is_std_expected>::value)) _LIBCPP_HIDE_FROM_ABI constexpr explicit(!is_convertible_v<_Up, _Tp>) expected(_Up&& __u) noexcept(is_nothrow_constructible_v<_Tp, _Up>) // strengthened @@ -668,7 +666,7 @@ public: return *this; } - template + template > _LIBCPP_HIDE_FROM_ABI constexpr expected& operator=(_Up&& __v) requires(!is_same_v> && !__is_std_unexpected>::value && is_constructible_v<_Tp, _Up> && is_assignable_v<_Tp&, _Up> && @@ -800,25 +798,25 @@ public: return std::addressof(this->__val()); } - _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator*() const& noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator*() const& noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( this->__has_val(), "expected::operator* requires the expected to contain a value"); return this->__val(); } - _LIBCPP_HIDE_FROM_ABI constexpr _Tp& operator*() & noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp& operator*() & noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( this->__has_val(), "expected::operator* requires the expected to contain a value"); return this->__val(); } - _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&& operator*() const&& noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&& operator*() const&& noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( this->__has_val(), "expected::operator* requires the expected to contain a value"); return std::move(this->__val()); } - _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& operator*() && noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& operator*() && noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( this->__has_val(), "expected::operator* requires the expected to contain a value"); return std::move(this->__val()); @@ -826,9 +824,9 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr explicit operator bool() const noexcept { return this->__has_val(); } - _LIBCPP_HIDE_FROM_ABI constexpr bool has_value() const noexcept { return this->__has_val(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool has_value() const noexcept { return this->__has_val(); } - _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& value() const& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& value() const& { static_assert(is_copy_constructible_v<_Err>, "error_type has to be copy constructible"); if (!this->__has_val()) { std::__throw_bad_expected_access<_Err>(std::as_const(error())); @@ -836,7 +834,7 @@ public: return this->__val(); } - _LIBCPP_HIDE_FROM_ABI constexpr _Tp& value() & { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp& value() & { static_assert(is_copy_constructible_v<_Err>, "error_type has to be copy constructible"); if (!this->__has_val()) { std::__throw_bad_expected_access<_Err>(std::as_const(error())); @@ -844,7 +842,7 @@ public: return this->__val(); } - _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&& value() const&& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&& value() const&& { static_assert(is_copy_constructible_v<_Err> && is_constructible_v<_Err, decltype(std::move(error()))>, "error_type has to be both copy constructible and constructible from decltype(std::move(error()))"); if (!this->__has_val()) { @@ -853,7 +851,7 @@ public: return std::move(this->__val()); } - _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& value() && { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& value() && { static_assert(is_copy_constructible_v<_Err> && is_constructible_v<_Err, decltype(std::move(error()))>, "error_type has to be both copy constructible and constructible from decltype(std::move(error()))"); if (!this->__has_val()) { @@ -862,46 +860,46 @@ public: return std::move(this->__val()); } - _LIBCPP_HIDE_FROM_ABI constexpr const _Err& error() const& noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Err& error() const& noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( !this->__has_val(), "expected::error requires the expected to contain an error"); return this->__unex(); } - _LIBCPP_HIDE_FROM_ABI constexpr _Err& error() & noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err& error() & noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( !this->__has_val(), "expected::error requires the expected to contain an error"); return this->__unex(); } - _LIBCPP_HIDE_FROM_ABI constexpr const _Err&& error() const&& noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Err&& error() const&& noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( !this->__has_val(), "expected::error requires the expected to contain an error"); return std::move(this->__unex()); } - _LIBCPP_HIDE_FROM_ABI constexpr _Err&& error() && noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err&& error() && noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( !this->__has_val(), "expected::error requires the expected to contain an error"); return std::move(this->__unex()); } - template - _LIBCPP_HIDE_FROM_ABI constexpr _Tp value_or(_Up&& __v) const& { + template > + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp value_or(_Up&& __v) const& { static_assert(is_copy_constructible_v<_Tp>, "value_type has to be copy constructible"); static_assert(is_convertible_v<_Up, _Tp>, "argument has to be convertible to value_type"); return this->__has_val() ? this->__val() : static_cast<_Tp>(std::forward<_Up>(__v)); } - template - _LIBCPP_HIDE_FROM_ABI constexpr _Tp value_or(_Up&& __v) && { + template > + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp value_or(_Up&& __v) && { static_assert(is_move_constructible_v<_Tp>, "value_type has to be move constructible"); static_assert(is_convertible_v<_Up, _Tp>, "argument has to be convertible to value_type"); return this->__has_val() ? std::move(this->__val()) : static_cast<_Tp>(std::forward<_Up>(__v)); } template - _LIBCPP_HIDE_FROM_ABI constexpr _Err error_or(_Up&& __error) const& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err error_or(_Up&& __error) const& { static_assert(is_copy_constructible_v<_Err>, "error_type has to be copy constructible"); static_assert(is_convertible_v<_Up, _Err>, "argument has to be convertible to error_type"); if (has_value()) @@ -910,7 +908,7 @@ public: } template - _LIBCPP_HIDE_FROM_ABI constexpr _Err error_or(_Up&& __error) && { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err error_or(_Up&& __error) && { static_assert(is_move_constructible_v<_Err>, "error_type has to be move constructible"); static_assert(is_convertible_v<_Up, _Err>, "argument has to be convertible to error_type"); if (has_value()) @@ -921,7 +919,7 @@ public: // [expected.void.monadic], monadic template requires is_constructible_v<_Err, _Err&> - _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) & { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) & { using _Up = remove_cvref_t>; static_assert(__is_std_expected<_Up>::value, "The result of f(value()) must be a specialization of std::expected"); static_assert(is_same_v, @@ -934,7 +932,7 @@ public: template requires is_constructible_v<_Err, const _Err&> - _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const& { using _Up = remove_cvref_t>; static_assert(__is_std_expected<_Up>::value, "The result of f(value()) must be a specialization of std::expected"); static_assert(is_same_v, @@ -947,7 +945,7 @@ public: template requires is_constructible_v<_Err, _Err&&> - _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) && { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) && { using _Up = remove_cvref_t>; static_assert( __is_std_expected<_Up>::value, "The result of f(std::move(value())) must be a specialization of std::expected"); @@ -961,7 +959,7 @@ public: template requires is_constructible_v<_Err, const _Err&&> - _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const&& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const&& { using _Up = remove_cvref_t>; static_assert( __is_std_expected<_Up>::value, "The result of f(std::move(value())) must be a specialization of std::expected"); @@ -975,7 +973,7 @@ public: template requires is_constructible_v<_Tp, _Tp&> - _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) & { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) & { using _Gp = remove_cvref_t>; static_assert(__is_std_expected<_Gp>::value, "The result of f(error()) must be a specialization of std::expected"); static_assert(is_same_v, @@ -988,7 +986,7 @@ public: template requires is_constructible_v<_Tp, const _Tp&> - _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) const& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) const& { using _Gp = remove_cvref_t>; static_assert(__is_std_expected<_Gp>::value, "The result of f(error()) must be a specialization of std::expected"); static_assert(is_same_v, @@ -1001,7 +999,7 @@ public: template requires is_constructible_v<_Tp, _Tp&&> - _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) && { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) && { using _Gp = remove_cvref_t>; static_assert( __is_std_expected<_Gp>::value, "The result of f(std::move(error())) must be a specialization of std::expected"); @@ -1015,7 +1013,7 @@ public: template requires is_constructible_v<_Tp, const _Tp&&> - _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) const&& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) const&& { using _Gp = remove_cvref_t>; static_assert( __is_std_expected<_Gp>::value, "The result of f(std::move(error())) must be a specialization of std::expected"); @@ -1029,7 +1027,7 @@ public: template requires is_constructible_v<_Err, _Err&> - _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) & { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) & { using _Up = remove_cv_t>; if (!has_value()) { return expected<_Up, _Err>(unexpect, error()); @@ -1045,7 +1043,7 @@ public: template requires is_constructible_v<_Err, const _Err&> - _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const& { using _Up = remove_cv_t>; if (!has_value()) { return expected<_Up, _Err>(unexpect, error()); @@ -1061,7 +1059,7 @@ public: template requires is_constructible_v<_Err, _Err&&> - _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) && { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) && { using _Up = remove_cv_t>; if (!has_value()) { return expected<_Up, _Err>(unexpect, std::move(error())); @@ -1077,7 +1075,7 @@ public: template requires is_constructible_v<_Err, const _Err&&> - _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const&& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const&& { using _Up = remove_cv_t>; if (!has_value()) { return expected<_Up, _Err>(unexpect, std::move(error())); @@ -1093,7 +1091,7 @@ public: template requires is_constructible_v<_Tp, _Tp&> - _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) & { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) & { using _Gp = remove_cv_t>; static_assert(__valid_std_unexpected<_Gp>::value, "The result of f(error()) must be a valid template argument for unexpected"); @@ -1105,7 +1103,7 @@ public: template requires is_constructible_v<_Tp, const _Tp&> - _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) const& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) const& { using _Gp = remove_cv_t>; static_assert(__valid_std_unexpected<_Gp>::value, "The result of f(error()) must be a valid template argument for unexpected"); @@ -1117,7 +1115,7 @@ public: template requires is_constructible_v<_Tp, _Tp&&> - _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) && { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) && { using _Gp = remove_cv_t>; static_assert(__valid_std_unexpected<_Gp>::value, "The result of f(std::move(error())) must be a valid template argument for unexpected"); @@ -1130,7 +1128,7 @@ public: template requires is_constructible_v<_Tp, const _Tp&&> - _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) const&& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) const&& { using _Gp = remove_cv_t>; static_assert(__valid_std_unexpected<_Gp>::value, "The result of f(std::move(error())) must be a valid template argument for unexpected"); @@ -1597,7 +1595,7 @@ public: // [expected.void.obs], observers _LIBCPP_HIDE_FROM_ABI constexpr explicit operator bool() const noexcept { return this->__has_val(); } - _LIBCPP_HIDE_FROM_ABI constexpr bool has_value() const noexcept { return this->__has_val(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool has_value() const noexcept { return this->__has_val(); } _LIBCPP_HIDE_FROM_ABI constexpr void operator*() const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( @@ -1618,32 +1616,32 @@ public: } } - _LIBCPP_HIDE_FROM_ABI constexpr const _Err& error() const& noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Err& error() const& noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( !this->__has_val(), "expected::error requires the expected to contain an error"); return this->__unex(); } - _LIBCPP_HIDE_FROM_ABI constexpr _Err& error() & noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err& error() & noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( !this->__has_val(), "expected::error requires the expected to contain an error"); return this->__unex(); } - _LIBCPP_HIDE_FROM_ABI constexpr const _Err&& error() const&& noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Err&& error() const&& noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( !this->__has_val(), "expected::error requires the expected to contain an error"); return std::move(this->__unex()); } - _LIBCPP_HIDE_FROM_ABI constexpr _Err&& error() && noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err&& error() && noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( !this->__has_val(), "expected::error requires the expected to contain an error"); return std::move(this->__unex()); } template - _LIBCPP_HIDE_FROM_ABI constexpr _Err error_or(_Up&& __error) const& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err error_or(_Up&& __error) const& { static_assert(is_copy_constructible_v<_Err>, "error_type has to be copy constructible"); static_assert(is_convertible_v<_Up, _Err>, "argument has to be convertible to error_type"); if (has_value()) { @@ -1653,7 +1651,7 @@ public: } template - _LIBCPP_HIDE_FROM_ABI constexpr _Err error_or(_Up&& __error) && { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err error_or(_Up&& __error) && { static_assert(is_move_constructible_v<_Err>, "error_type has to be move constructible"); static_assert(is_convertible_v<_Up, _Err>, "argument has to be convertible to error_type"); if (has_value()) { @@ -1665,7 +1663,7 @@ public: // [expected.void.monadic], monadic template requires is_constructible_v<_Err, _Err&> - _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) & { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) & { using _Up = remove_cvref_t>; static_assert(__is_std_expected<_Up>::value, "The result of f() must be a specialization of std::expected"); static_assert( @@ -1678,7 +1676,7 @@ public: template requires is_constructible_v<_Err, const _Err&> - _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const& { using _Up = remove_cvref_t>; static_assert(__is_std_expected<_Up>::value, "The result of f() must be a specialization of std::expected"); static_assert( @@ -1691,7 +1689,7 @@ public: template requires is_constructible_v<_Err, _Err&&> - _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) && { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) && { using _Up = remove_cvref_t>; static_assert(__is_std_expected<_Up>::value, "The result of f() must be a specialization of std::expected"); static_assert( @@ -1704,7 +1702,7 @@ public: template requires is_constructible_v<_Err, const _Err&&> - _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const&& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const&& { using _Up = remove_cvref_t>; static_assert(__is_std_expected<_Up>::value, "The result of f() must be a specialization of std::expected"); static_assert( @@ -1716,7 +1714,7 @@ public: } template - _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) & { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) & { using _Gp = remove_cvref_t>; static_assert(__is_std_expected<_Gp>::value, "The result of f(error()) must be a specialization of std::expected"); static_assert(is_same_v, @@ -1728,7 +1726,7 @@ public: } template - _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) const& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) const& { using _Gp = remove_cvref_t>; static_assert(__is_std_expected<_Gp>::value, "The result of f(error()) must be a specialization of std::expected"); static_assert(is_same_v, @@ -1740,7 +1738,7 @@ public: } template - _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) && { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) && { using _Gp = remove_cvref_t>; static_assert( __is_std_expected<_Gp>::value, "The result of f(std::move(error())) must be a specialization of std::expected"); @@ -1753,7 +1751,7 @@ public: } template - _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) const&& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) const&& { using _Gp = remove_cvref_t>; static_assert( __is_std_expected<_Gp>::value, "The result of f(std::move(error())) must be a specialization of std::expected"); @@ -1767,7 +1765,7 @@ public: template requires is_constructible_v<_Err, _Err&> - _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) & { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) & { using _Up = remove_cv_t>; if (!has_value()) { return expected<_Up, _Err>(unexpect, error()); @@ -1782,7 +1780,7 @@ public: template requires is_constructible_v<_Err, const _Err&> - _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const& { using _Up = remove_cv_t>; if (!has_value()) { return expected<_Up, _Err>(unexpect, error()); @@ -1797,7 +1795,7 @@ public: template requires is_constructible_v<_Err, _Err&&> - _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) && { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) && { using _Up = remove_cv_t>; if (!has_value()) { return expected<_Up, _Err>(unexpect, std::move(error())); @@ -1812,7 +1810,7 @@ public: template requires is_constructible_v<_Err, const _Err&&> - _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const&& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const&& { using _Up = remove_cv_t>; if (!has_value()) { return expected<_Up, _Err>(unexpect, std::move(error())); @@ -1826,7 +1824,7 @@ public: } template - _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) & { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) & { using _Gp = remove_cv_t>; static_assert(__valid_std_unexpected<_Gp>::value, "The result of f(error()) must be a valid template argument for unexpected"); @@ -1837,7 +1835,7 @@ public: } template - _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) const& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) const& { using _Gp = remove_cv_t>; static_assert(__valid_std_unexpected<_Gp>::value, "The result of f(error()) must be a valid template argument for unexpected"); @@ -1848,7 +1846,7 @@ public: } template - _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) && { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) && { using _Gp = remove_cv_t>; static_assert(__valid_std_unexpected<_Gp>::value, "The result of f(std::move(error())) must be a valid template argument for unexpected"); @@ -1860,7 +1858,7 @@ public: } template - _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) const&& { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) const&& { using _Gp = remove_cv_t>; static_assert(__valid_std_unexpected<_Gp>::value, "The result of f(std::move(error())) must be a valid template argument for unexpected"); diff --git a/lib/libcxx/include/__expected/unexpected.h b/lib/libcxx/include/__expected/unexpected.h index 6904889b8c..fc4f52ce14 100644 --- a/lib/libcxx/include/__expected/unexpected.h +++ b/lib/libcxx/include/__expected/unexpected.h @@ -89,10 +89,10 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr unexpected& operator=(const unexpected&) = default; _LIBCPP_HIDE_FROM_ABI constexpr unexpected& operator=(unexpected&&) = default; - _LIBCPP_HIDE_FROM_ABI constexpr const _Err& error() const& noexcept { return __unex_; } - _LIBCPP_HIDE_FROM_ABI constexpr _Err& error() & noexcept { return __unex_; } - _LIBCPP_HIDE_FROM_ABI constexpr const _Err&& error() const&& noexcept { return std::move(__unex_); } - _LIBCPP_HIDE_FROM_ABI constexpr _Err&& error() && noexcept { return std::move(__unex_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Err& error() const& noexcept { return __unex_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err& error() & noexcept { return __unex_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Err&& error() const&& noexcept { return std::move(__unex_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err&& error() && noexcept { return std::move(__unex_); } _LIBCPP_HIDE_FROM_ABI constexpr void swap(unexpected& __other) noexcept(is_nothrow_swappable_v<_Err>) { static_assert(is_swappable_v<_Err>, "unexpected::swap requires is_swappable_v to be true"); diff --git a/lib/libcxx/include/__filesystem/copy_options.h b/lib/libcxx/include/__filesystem/copy_options.h index 097eebe611..d9039a6492 100644 --- a/lib/libcxx/include/__filesystem/copy_options.h +++ b/lib/libcxx/include/__filesystem/copy_options.h @@ -34,19 +34,19 @@ enum class copy_options : unsigned short { __in_recursive_copy = 512, }; -_LIBCPP_HIDE_FROM_ABI inline constexpr copy_options operator&(copy_options __lhs, copy_options __rhs) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr copy_options operator&(copy_options __lhs, copy_options __rhs) { return static_cast(static_cast(__lhs) & static_cast(__rhs)); } -_LIBCPP_HIDE_FROM_ABI inline constexpr copy_options operator|(copy_options __lhs, copy_options __rhs) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr copy_options operator|(copy_options __lhs, copy_options __rhs) { return static_cast(static_cast(__lhs) | static_cast(__rhs)); } -_LIBCPP_HIDE_FROM_ABI inline constexpr copy_options operator^(copy_options __lhs, copy_options __rhs) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr copy_options operator^(copy_options __lhs, copy_options __rhs) { return static_cast(static_cast(__lhs) ^ static_cast(__rhs)); } -_LIBCPP_HIDE_FROM_ABI inline constexpr copy_options operator~(copy_options __lhs) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr copy_options operator~(copy_options __lhs) { return static_cast(~static_cast(__lhs)); } diff --git a/lib/libcxx/include/__filesystem/directory_entry.h b/lib/libcxx/include/__filesystem/directory_entry.h index 5f236cf264..fab400b439 100644 --- a/lib/libcxx/include/__filesystem/directory_entry.h +++ b/lib/libcxx/include/__filesystem/directory_entry.h @@ -40,8 +40,6 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM -_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH - class directory_entry { typedef filesystem::path _Path; @@ -89,80 +87,88 @@ public: _LIBCPP_HIDE_FROM_ABI void refresh(error_code& __ec) noexcept { __refresh(&__ec); } - _LIBCPP_HIDE_FROM_ABI _Path const& path() const noexcept { return __p_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _Path const& path() const noexcept { return __p_; } _LIBCPP_HIDE_FROM_ABI operator const _Path&() const noexcept { return __p_; } - _LIBCPP_HIDE_FROM_ABI bool exists() const { return filesystem::exists(file_status{__get_ft()}); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool exists() const { return filesystem::exists(file_status{__get_ft()}); } - _LIBCPP_HIDE_FROM_ABI bool exists(error_code& __ec) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool exists(error_code& __ec) const noexcept { return filesystem::exists(file_status{__get_ft(&__ec)}); } - _LIBCPP_HIDE_FROM_ABI bool is_block_file() const { return __get_ft() == file_type::block; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_block_file() const { return __get_ft() == file_type::block; } - _LIBCPP_HIDE_FROM_ABI bool is_block_file(error_code& __ec) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_block_file(error_code& __ec) const noexcept { return __get_ft(&__ec) == file_type::block; } - _LIBCPP_HIDE_FROM_ABI bool is_character_file() const { return __get_ft() == file_type::character; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_character_file() const { return __get_ft() == file_type::character; } - _LIBCPP_HIDE_FROM_ABI bool is_character_file(error_code& __ec) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_character_file(error_code& __ec) const noexcept { return __get_ft(&__ec) == file_type::character; } - _LIBCPP_HIDE_FROM_ABI bool is_directory() const { return __get_ft() == file_type::directory; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_directory() const { return __get_ft() == file_type::directory; } - _LIBCPP_HIDE_FROM_ABI bool is_directory(error_code& __ec) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_directory(error_code& __ec) const noexcept { return __get_ft(&__ec) == file_type::directory; } - _LIBCPP_HIDE_FROM_ABI bool is_fifo() const { return __get_ft() == file_type::fifo; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_fifo() const { return __get_ft() == file_type::fifo; } - _LIBCPP_HIDE_FROM_ABI bool is_fifo(error_code& __ec) const noexcept { return __get_ft(&__ec) == file_type::fifo; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_fifo(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::fifo; + } - _LIBCPP_HIDE_FROM_ABI bool is_other() const { return filesystem::is_other(file_status{__get_ft()}); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_other() const { return filesystem::is_other(file_status{__get_ft()}); } - _LIBCPP_HIDE_FROM_ABI bool is_other(error_code& __ec) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_other(error_code& __ec) const noexcept { return filesystem::is_other(file_status{__get_ft(&__ec)}); } - _LIBCPP_HIDE_FROM_ABI bool is_regular_file() const { return __get_ft() == file_type::regular; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_regular_file() const { return __get_ft() == file_type::regular; } - _LIBCPP_HIDE_FROM_ABI bool is_regular_file(error_code& __ec) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_regular_file(error_code& __ec) const noexcept { return __get_ft(&__ec) == file_type::regular; } - _LIBCPP_HIDE_FROM_ABI bool is_socket() const { return __get_ft() == file_type::socket; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_socket() const { return __get_ft() == file_type::socket; } - _LIBCPP_HIDE_FROM_ABI bool is_socket(error_code& __ec) const noexcept { return __get_ft(&__ec) == file_type::socket; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_socket(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::socket; + } - _LIBCPP_HIDE_FROM_ABI bool is_symlink() const { return __get_sym_ft() == file_type::symlink; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_symlink() const { return __get_sym_ft() == file_type::symlink; } - _LIBCPP_HIDE_FROM_ABI bool is_symlink(error_code& __ec) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_symlink(error_code& __ec) const noexcept { return __get_sym_ft(&__ec) == file_type::symlink; } - _LIBCPP_HIDE_FROM_ABI uintmax_t file_size() const { return __get_size(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI uintmax_t file_size() const { return __get_size(); } - _LIBCPP_HIDE_FROM_ABI uintmax_t file_size(error_code& __ec) const noexcept { return __get_size(&__ec); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI uintmax_t file_size(error_code& __ec) const noexcept { return __get_size(&__ec); } - _LIBCPP_HIDE_FROM_ABI uintmax_t hard_link_count() const { return __get_nlink(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI uintmax_t hard_link_count() const { return __get_nlink(); } - _LIBCPP_HIDE_FROM_ABI uintmax_t hard_link_count(error_code& __ec) const noexcept { return __get_nlink(&__ec); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI uintmax_t hard_link_count(error_code& __ec) const noexcept { + return __get_nlink(&__ec); + } - _LIBCPP_HIDE_FROM_ABI file_time_type last_write_time() const { return __get_write_time(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI file_time_type last_write_time() const { return __get_write_time(); } - _LIBCPP_HIDE_FROM_ABI file_time_type last_write_time(error_code& __ec) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI file_time_type last_write_time(error_code& __ec) const noexcept { return __get_write_time(&__ec); } - _LIBCPP_HIDE_FROM_ABI file_status status() const { return __get_status(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI file_status status() const { return __get_status(); } - _LIBCPP_HIDE_FROM_ABI file_status status(error_code& __ec) const noexcept { return __get_status(&__ec); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI file_status status(error_code& __ec) const noexcept { + return __get_status(&__ec); + } - _LIBCPP_HIDE_FROM_ABI file_status symlink_status() const { return __get_symlink_status(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI file_status symlink_status() const { return __get_symlink_status(); } - _LIBCPP_HIDE_FROM_ABI file_status symlink_status(error_code& __ec) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI file_status symlink_status(error_code& __ec) const noexcept { return __get_symlink_status(&__ec); } @@ -459,8 +465,6 @@ private: directory_entry __elem_; }; -_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP - _LIBCPP_END_NAMESPACE_FILESYSTEM #endif // _LIBCPP_STD_VER >= 17 && _LIBCPP_HAS_FILESYSTEM diff --git a/lib/libcxx/include/__filesystem/directory_iterator.h b/lib/libcxx/include/__filesystem/directory_iterator.h index f5085b39eb..b62129807b 100644 --- a/lib/libcxx/include/__filesystem/directory_iterator.h +++ b/lib/libcxx/include/__filesystem/directory_iterator.h @@ -34,8 +34,6 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM -_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH - class _LIBCPP_HIDDEN __dir_stream; class directory_iterator { public: @@ -73,7 +71,7 @@ public: _LIBCPP_HIDE_FROM_ABI ~directory_iterator() = default; - _LIBCPP_HIDE_FROM_ABI const directory_entry& operator*() const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const directory_entry& operator*() const { // Note: this check duplicates a check in `__dereference()`. _LIBCPP_ASSERT_NON_NULL(__imp_, "The end iterator cannot be dereferenced"); return __dereference(); @@ -123,23 +121,23 @@ operator!=(const directory_iterator& __lhs, const directory_iterator& __rhs) noe } // enable directory_iterator range-based for statements -inline _LIBCPP_HIDE_FROM_ABI directory_iterator begin(directory_iterator __iter) noexcept { return __iter; } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI directory_iterator begin(directory_iterator __iter) noexcept { + return __iter; +} -inline _LIBCPP_HIDE_FROM_ABI directory_iterator end(directory_iterator) noexcept { return directory_iterator(); } - -_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI directory_iterator end(directory_iterator) noexcept { + return directory_iterator(); +} _LIBCPP_END_NAMESPACE_FILESYSTEM # if _LIBCPP_STD_VER >= 20 template <> -_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY inline constexpr bool - std::ranges::enable_borrowed_range = true; +inline constexpr bool std::ranges::enable_borrowed_range = true; template <> -_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY inline constexpr bool - std::ranges::enable_view = true; +inline constexpr bool std::ranges::enable_view = true; # endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__filesystem/directory_options.h b/lib/libcxx/include/__filesystem/directory_options.h index d0cd3ebfda..11c7d204ea 100644 --- a/lib/libcxx/include/__filesystem/directory_options.h +++ b/lib/libcxx/include/__filesystem/directory_options.h @@ -22,19 +22,22 @@ _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM enum class directory_options : unsigned char { none = 0, follow_directory_symlink = 1, skip_permission_denied = 2 }; -_LIBCPP_HIDE_FROM_ABI inline constexpr directory_options operator&(directory_options __lhs, directory_options __rhs) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr directory_options +operator&(directory_options __lhs, directory_options __rhs) { return static_cast(static_cast(__lhs) & static_cast(__rhs)); } -_LIBCPP_HIDE_FROM_ABI inline constexpr directory_options operator|(directory_options __lhs, directory_options __rhs) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr directory_options +operator|(directory_options __lhs, directory_options __rhs) { return static_cast(static_cast(__lhs) | static_cast(__rhs)); } -_LIBCPP_HIDE_FROM_ABI inline constexpr directory_options operator^(directory_options __lhs, directory_options __rhs) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr directory_options +operator^(directory_options __lhs, directory_options __rhs) { return static_cast(static_cast(__lhs) ^ static_cast(__rhs)); } -_LIBCPP_HIDE_FROM_ABI inline constexpr directory_options operator~(directory_options __lhs) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr directory_options operator~(directory_options __lhs) { return static_cast(~static_cast(__lhs)); } diff --git a/lib/libcxx/include/__filesystem/file_status.h b/lib/libcxx/include/__filesystem/file_status.h index da316c8b02..746cd0f9a6 100644 --- a/lib/libcxx/include/__filesystem/file_status.h +++ b/lib/libcxx/include/__filesystem/file_status.h @@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM -class _LIBCPP_EXPORTED_FROM_ABI file_status { +class file_status { public: // constructors _LIBCPP_HIDE_FROM_ABI file_status() noexcept : file_status(file_type::none) {} @@ -38,9 +38,9 @@ public: _LIBCPP_HIDE_FROM_ABI file_status& operator=(file_status&&) noexcept = default; // observers - _LIBCPP_HIDE_FROM_ABI file_type type() const noexcept { return __ft_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI file_type type() const noexcept { return __ft_; } - _LIBCPP_HIDE_FROM_ABI perms permissions() const noexcept { return __prms_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI perms permissions() const noexcept { return __prms_; } // modifiers _LIBCPP_HIDE_FROM_ABI void type(file_type __ft) noexcept { __ft_ = __ft; } diff --git a/lib/libcxx/include/__filesystem/filesystem_error.h b/lib/libcxx/include/__filesystem/filesystem_error.h index 73592bba31..6f1daf866a 100644 --- a/lib/libcxx/include/__filesystem/filesystem_error.h +++ b/lib/libcxx/include/__filesystem/filesystem_error.h @@ -27,7 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM -class _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY _LIBCPP_EXPORTED_FROM_ABI filesystem_error : public system_error { +class _LIBCPP_EXPORTED_FROM_ABI filesystem_error : public system_error { public: _LIBCPP_HIDE_FROM_ABI filesystem_error(const string& __what, error_code __ec) : system_error(__ec, __what), __storage_(make_shared<_Storage>(path(), path())) { @@ -44,15 +44,16 @@ public: __create_what(2); } - _LIBCPP_HIDE_FROM_ABI const path& path1() const noexcept { return __storage_->__p1_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const path& path1() const noexcept { return __storage_->__p1_; } - _LIBCPP_HIDE_FROM_ABI const path& path2() const noexcept { return __storage_->__p2_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const path& path2() const noexcept { return __storage_->__p2_; } - _LIBCPP_HIDE_FROM_ABI filesystem_error(const filesystem_error&) = default; + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI filesystem_error(const filesystem_error&) = default; ~filesystem_error() override; // key function - _LIBCPP_HIDE_FROM_ABI_VIRTUAL - const char* what() const noexcept override { return __storage_->__what_.c_str(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI_VIRTUAL const char* what() const noexcept override { + return __storage_->__what_.c_str(); + } void __create_what(int __num_paths); @@ -69,14 +70,12 @@ private: # if _LIBCPP_HAS_EXCEPTIONS template -[[__noreturn__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY void -__throw_filesystem_error(_Args&&... __args) { +[[__noreturn__]] inline _LIBCPP_HIDE_FROM_ABI void __throw_filesystem_error(_Args&&... __args) { throw filesystem_error(std::forward<_Args>(__args)...); } # else template -[[__noreturn__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY void -__throw_filesystem_error(_Args&&...) { +[[__noreturn__]] inline _LIBCPP_HIDE_FROM_ABI void __throw_filesystem_error(_Args&&...) { _LIBCPP_VERBOSE_ABORT("filesystem_error was thrown in -fno-exceptions mode"); } # endif diff --git a/lib/libcxx/include/__filesystem/operations.h b/lib/libcxx/include/__filesystem/operations.h index 29b6c2f798..f536a1a9d4 100644 --- a/lib/libcxx/include/__filesystem/operations.h +++ b/lib/libcxx/include/__filesystem/operations.h @@ -31,8 +31,6 @@ _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM -_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH - _LIBCPP_EXPORTED_FROM_ABI path __absolute(const path&, error_code* __ec = nullptr); _LIBCPP_EXPORTED_FROM_ABI path __canonical(const path&, error_code* __ec = nullptr); _LIBCPP_EXPORTED_FROM_ABI bool @@ -70,10 +68,14 @@ _LIBCPP_EXPORTED_FROM_ABI bool __fs_is_empty(const path& __p, error_code* __ec = _LIBCPP_EXPORTED_FROM_ABI void __permissions(const path&, perms, perm_options, error_code* = nullptr); _LIBCPP_EXPORTED_FROM_ABI space_info __space(const path&, error_code* __ec = nullptr); -inline _LIBCPP_HIDE_FROM_ABI path absolute(const path& __p) { return __absolute(__p); } -inline _LIBCPP_HIDE_FROM_ABI path absolute(const path& __p, error_code& __ec) { return __absolute(__p, &__ec); } -inline _LIBCPP_HIDE_FROM_ABI path canonical(const path& __p) { return __canonical(__p); } -inline _LIBCPP_HIDE_FROM_ABI path canonical(const path& __p, error_code& __ec) { return __canonical(__p, &__ec); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path absolute(const path& __p) { return __absolute(__p); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path absolute(const path& __p, error_code& __ec) { + return __absolute(__p, &__ec); +} +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path canonical(const path& __p) { return __canonical(__p); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path canonical(const path& __p, error_code& __ec) { + return __canonical(__p, &__ec); +} inline _LIBCPP_HIDE_FROM_ABI bool copy_file(const path& __from, const path& __to) { return __copy_file(__from, __to, copy_options::none); } @@ -137,85 +139,112 @@ inline _LIBCPP_HIDE_FROM_ABI void create_symlink(const path& __target, const pat inline _LIBCPP_HIDE_FROM_ABI void create_symlink(const path& __target, const path& __link, error_code& __ec) noexcept { return __create_symlink(__target, __link, &__ec); } -inline _LIBCPP_HIDE_FROM_ABI path current_path() { return __current_path(); } -inline _LIBCPP_HIDE_FROM_ABI path current_path(error_code& __ec) { return __current_path(&__ec); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path current_path() { return __current_path(); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path current_path(error_code& __ec) { return __current_path(&__ec); } inline _LIBCPP_HIDE_FROM_ABI void current_path(const path& __p) { __current_path(__p); } inline _LIBCPP_HIDE_FROM_ABI void current_path(const path& __p, error_code& __ec) noexcept { __current_path(__p, &__ec); } -inline _LIBCPP_HIDE_FROM_ABI bool equivalent(const path& __p1, const path& __p2) { return __equivalent(__p1, __p2); } -inline _LIBCPP_HIDE_FROM_ABI bool equivalent(const path& __p1, const path& __p2, error_code& __ec) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool equivalent(const path& __p1, const path& __p2) { + return __equivalent(__p1, __p2); +} +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool +equivalent(const path& __p1, const path& __p2, error_code& __ec) noexcept { return __equivalent(__p1, __p2, &__ec); } -inline _LIBCPP_HIDE_FROM_ABI bool status_known(file_status __s) noexcept { return __s.type() != file_type::none; } -inline _LIBCPP_HIDE_FROM_ABI bool exists(file_status __s) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool status_known(file_status __s) noexcept { + return __s.type() != file_type::none; +} +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool exists(file_status __s) noexcept { return status_known(__s) && __s.type() != file_type::not_found; } -inline _LIBCPP_HIDE_FROM_ABI bool exists(const path& __p) { return exists(__status(__p)); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool exists(const path& __p) { return exists(__status(__p)); } -inline _LIBCPP_HIDE_FROM_ABI bool exists(const path& __p, error_code& __ec) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool exists(const path& __p, error_code& __ec) noexcept { auto __s = __status(__p, &__ec); if (status_known(__s)) __ec.clear(); return exists(__s); } -inline _LIBCPP_HIDE_FROM_ABI uintmax_t file_size(const path& __p) { return __file_size(__p); } -inline _LIBCPP_HIDE_FROM_ABI uintmax_t file_size(const path& __p, error_code& __ec) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI uintmax_t file_size(const path& __p) { return __file_size(__p); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI uintmax_t file_size(const path& __p, error_code& __ec) noexcept { return __file_size(__p, &__ec); } -inline _LIBCPP_HIDE_FROM_ABI uintmax_t hard_link_count(const path& __p) { return __hard_link_count(__p); } -inline _LIBCPP_HIDE_FROM_ABI uintmax_t hard_link_count(const path& __p, error_code& __ec) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI uintmax_t hard_link_count(const path& __p) { return __hard_link_count(__p); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI uintmax_t hard_link_count(const path& __p, error_code& __ec) noexcept { return __hard_link_count(__p, &__ec); } -inline _LIBCPP_HIDE_FROM_ABI bool is_block_file(file_status __s) noexcept { return __s.type() == file_type::block; } -inline _LIBCPP_HIDE_FROM_ABI bool is_block_file(const path& __p) { return is_block_file(__status(__p)); } -inline _LIBCPP_HIDE_FROM_ABI bool is_block_file(const path& __p, error_code& __ec) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_block_file(file_status __s) noexcept { + return __s.type() == file_type::block; +} +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_block_file(const path& __p) { return is_block_file(__status(__p)); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_block_file(const path& __p, error_code& __ec) noexcept { return is_block_file(__status(__p, &__ec)); } -inline _LIBCPP_HIDE_FROM_ABI bool is_character_file(file_status __s) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_character_file(file_status __s) noexcept { return __s.type() == file_type::character; } -inline _LIBCPP_HIDE_FROM_ABI bool is_character_file(const path& __p) { return is_character_file(__status(__p)); } -inline _LIBCPP_HIDE_FROM_ABI bool is_character_file(const path& __p, error_code& __ec) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_character_file(const path& __p) { + return is_character_file(__status(__p)); +} +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_character_file(const path& __p, error_code& __ec) noexcept { return is_character_file(__status(__p, &__ec)); } -inline _LIBCPP_HIDE_FROM_ABI bool is_directory(file_status __s) noexcept { return __s.type() == file_type::directory; } -inline _LIBCPP_HIDE_FROM_ABI bool is_directory(const path& __p) { return is_directory(__status(__p)); } -inline _LIBCPP_HIDE_FROM_ABI bool is_directory(const path& __p, error_code& __ec) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_directory(file_status __s) noexcept { + return __s.type() == file_type::directory; +} +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_directory(const path& __p) { return is_directory(__status(__p)); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_directory(const path& __p, error_code& __ec) noexcept { return is_directory(__status(__p, &__ec)); } -inline _LIBCPP_HIDE_FROM_ABI bool is_empty(const path& __p) { return __fs_is_empty(__p); } -inline _LIBCPP_HIDE_FROM_ABI bool is_empty(const path& __p, error_code& __ec) { return __fs_is_empty(__p, &__ec); } -inline _LIBCPP_HIDE_FROM_ABI bool is_fifo(file_status __s) noexcept { return __s.type() == file_type::fifo; } -inline _LIBCPP_HIDE_FROM_ABI bool is_fifo(const path& __p) { return is_fifo(__status(__p)); } -inline _LIBCPP_HIDE_FROM_ABI bool is_fifo(const path& __p, error_code& __ec) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_empty(const path& __p) { return __fs_is_empty(__p); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_empty(const path& __p, error_code& __ec) { + return __fs_is_empty(__p, &__ec); +} +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_fifo(file_status __s) noexcept { + return __s.type() == file_type::fifo; +} +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_fifo(const path& __p) { return is_fifo(__status(__p)); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_fifo(const path& __p, error_code& __ec) noexcept { return is_fifo(__status(__p, &__ec)); } -inline _LIBCPP_HIDE_FROM_ABI bool is_regular_file(file_status __s) noexcept { return __s.type() == file_type::regular; } -inline _LIBCPP_HIDE_FROM_ABI bool is_regular_file(const path& __p) { return is_regular_file(__status(__p)); } -inline _LIBCPP_HIDE_FROM_ABI bool is_regular_file(const path& __p, error_code& __ec) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_regular_file(file_status __s) noexcept { + return __s.type() == file_type::regular; +} +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_regular_file(const path& __p) { + return is_regular_file(__status(__p)); +} +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_regular_file(const path& __p, error_code& __ec) noexcept { return is_regular_file(__status(__p, &__ec)); } -inline _LIBCPP_HIDE_FROM_ABI bool is_symlink(file_status __s) noexcept { return __s.type() == file_type::symlink; } -inline _LIBCPP_HIDE_FROM_ABI bool is_symlink(const path& __p) { return is_symlink(__symlink_status(__p)); } -inline _LIBCPP_HIDE_FROM_ABI bool is_symlink(const path& __p, error_code& __ec) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_symlink(file_status __s) noexcept { + return __s.type() == file_type::symlink; +} +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_symlink(const path& __p) { + return is_symlink(__symlink_status(__p)); +} +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_symlink(const path& __p, error_code& __ec) noexcept { return is_symlink(__symlink_status(__p, &__ec)); } -inline _LIBCPP_HIDE_FROM_ABI bool is_other(file_status __s) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_other(file_status __s) noexcept { return exists(__s) && !is_regular_file(__s) && !is_directory(__s) && !is_symlink(__s); } -inline _LIBCPP_HIDE_FROM_ABI bool is_other(const path& __p) { return is_other(__status(__p)); } -inline _LIBCPP_HIDE_FROM_ABI bool is_other(const path& __p, error_code& __ec) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_other(const path& __p) { return is_other(__status(__p)); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_other(const path& __p, error_code& __ec) noexcept { return is_other(__status(__p, &__ec)); } -inline _LIBCPP_HIDE_FROM_ABI bool is_socket(file_status __s) noexcept { return __s.type() == file_type::socket; } -inline _LIBCPP_HIDE_FROM_ABI bool is_socket(const path& __p) { return is_socket(__status(__p)); } -inline _LIBCPP_HIDE_FROM_ABI bool is_socket(const path& __p, error_code& __ec) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_socket(file_status __s) noexcept { + return __s.type() == file_type::socket; +} +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_socket(const path& __p) { return is_socket(__status(__p)); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_socket(const path& __p, error_code& __ec) noexcept { return is_socket(__status(__p, &__ec)); } -inline _LIBCPP_HIDE_FROM_ABI file_time_type last_write_time(const path& __p) { return __last_write_time(__p); } -inline _LIBCPP_HIDE_FROM_ABI file_time_type last_write_time(const path& __p, error_code& __ec) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI file_time_type last_write_time(const path& __p) { + return __last_write_time(__p); +} +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI file_time_type last_write_time(const path& __p, error_code& __ec) noexcept { return __last_write_time(__p, &__ec); } inline _LIBCPP_HIDE_FROM_ABI void last_write_time(const path& __p, file_time_type __t) { __last_write_time(__p, __t); } @@ -233,7 +262,7 @@ inline _LIBCPP_HIDE_FROM_ABI void permissions(const path& __p, perms __prms, per __permissions(__p, __prms, __opts, &__ec); } -inline _LIBCPP_HIDE_FROM_ABI path proximate(const path& __p, const path& __base, error_code& __ec) { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path proximate(const path& __p, const path& __base, error_code& __ec) { path __tmp = __weakly_canonical(__p, &__ec); if (__ec) return {}; @@ -243,16 +272,18 @@ inline _LIBCPP_HIDE_FROM_ABI path proximate(const path& __p, const path& __base, return __tmp.lexically_proximate(__tmp_base); } -inline _LIBCPP_HIDE_FROM_ABI path proximate(const path& __p, error_code& __ec) { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path proximate(const path& __p, error_code& __ec) { return proximate(__p, current_path(), __ec); } -inline _LIBCPP_HIDE_FROM_ABI path proximate(const path& __p, const path& __base = current_path()) { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path proximate(const path& __p, const path& __base = current_path()) { return __weakly_canonical(__p).lexically_proximate(__weakly_canonical(__base)); } -inline _LIBCPP_HIDE_FROM_ABI path read_symlink(const path& __p) { return __read_symlink(__p); } -inline _LIBCPP_HIDE_FROM_ABI path read_symlink(const path& __p, error_code& __ec) { return __read_symlink(__p, &__ec); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path read_symlink(const path& __p) { return __read_symlink(__p); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path read_symlink(const path& __p, error_code& __ec) { + return __read_symlink(__p, &__ec); +} -inline _LIBCPP_HIDE_FROM_ABI path relative(const path& __p, const path& __base, error_code& __ec) { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path relative(const path& __p, const path& __base, error_code& __ec) { path __tmp = __weakly_canonical(__p, &__ec); if (__ec) return path(); @@ -262,10 +293,10 @@ inline _LIBCPP_HIDE_FROM_ABI path relative(const path& __p, const path& __base, return __tmp.lexically_relative(__tmpbase); } -inline _LIBCPP_HIDE_FROM_ABI path relative(const path& __p, error_code& __ec) { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path relative(const path& __p, error_code& __ec) { return relative(__p, current_path(), __ec); } -inline _LIBCPP_HIDE_FROM_ABI path relative(const path& __p, const path& __base = current_path()) { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path relative(const path& __p, const path& __base = current_path()) { return __weakly_canonical(__p).lexically_relative(__weakly_canonical(__base)); } inline _LIBCPP_HIDE_FROM_ABI uintmax_t remove_all(const path& __p) { return __remove_all(__p); } @@ -282,27 +313,27 @@ inline _LIBCPP_HIDE_FROM_ABI void resize_file(const path& __p, uintmax_t __ns) { inline _LIBCPP_HIDE_FROM_ABI void resize_file(const path& __p, uintmax_t __ns, error_code& __ec) noexcept { return __resize_file(__p, __ns, &__ec); } -inline _LIBCPP_HIDE_FROM_ABI space_info space(const path& __p) { return __space(__p); } -inline _LIBCPP_HIDE_FROM_ABI space_info space(const path& __p, error_code& __ec) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI space_info space(const path& __p) { return __space(__p); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI space_info space(const path& __p, error_code& __ec) noexcept { return __space(__p, &__ec); } -inline _LIBCPP_HIDE_FROM_ABI file_status status(const path& __p) { return __status(__p); } -inline _LIBCPP_HIDE_FROM_ABI file_status status(const path& __p, error_code& __ec) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI file_status status(const path& __p) { return __status(__p); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI file_status status(const path& __p, error_code& __ec) noexcept { return __status(__p, &__ec); } -inline _LIBCPP_HIDE_FROM_ABI file_status symlink_status(const path& __p) { return __symlink_status(__p); } -inline _LIBCPP_HIDE_FROM_ABI file_status symlink_status(const path& __p, error_code& __ec) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI file_status symlink_status(const path& __p) { return __symlink_status(__p); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI file_status symlink_status(const path& __p, error_code& __ec) noexcept { return __symlink_status(__p, &__ec); } -inline _LIBCPP_HIDE_FROM_ABI path temp_directory_path() { return __temp_directory_path(); } -inline _LIBCPP_HIDE_FROM_ABI path temp_directory_path(error_code& __ec) { return __temp_directory_path(&__ec); } -inline _LIBCPP_HIDE_FROM_ABI path weakly_canonical(path const& __p) { return __weakly_canonical(__p); } -inline _LIBCPP_HIDE_FROM_ABI path weakly_canonical(path const& __p, error_code& __ec) { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path temp_directory_path() { return __temp_directory_path(); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path temp_directory_path(error_code& __ec) { + return __temp_directory_path(&__ec); +} +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path weakly_canonical(path const& __p) { return __weakly_canonical(__p); } +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path weakly_canonical(path const& __p, error_code& __ec) { return __weakly_canonical(__p, &__ec); } -_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP - _LIBCPP_END_NAMESPACE_FILESYSTEM #endif // _LIBCPP_STD_VER >= 17 && _LIBCPP_HAS_FILESYSTEM diff --git a/lib/libcxx/include/__filesystem/path.h b/lib/libcxx/include/__filesystem/path.h index 381e5678a5..4fd3acad4d 100644 --- a/lib/libcxx/include/__filesystem/path.h +++ b/lib/libcxx/include/__filesystem/path.h @@ -42,8 +42,6 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM -_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH - template struct __can_convert_char { static const bool value = false; @@ -326,6 +324,7 @@ struct _PathCVT { } }; +# if _LIBCPP_HAS_LOCALIZATION template struct _PathExport { typedef __narrow_to_utf8 _Narrower; @@ -366,7 +365,7 @@ struct _PathExport { } }; -# if _LIBCPP_HAS_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T template <> struct _PathExport { typedef __narrow_to_utf8 _Narrower; @@ -376,8 +375,9 @@ struct _PathExport { _Narrower()(back_inserter(__dest), __src.data(), __src.data() + __src.size()); } }; -# endif // _LIBCPP_HAS_CHAR8_T -# endif /* _LIBCPP_WIN32API */ +# endif // _LIBCPP_HAS_CHAR8_T +# endif // _LIBCPP_HAS_LOCALIZATION +# endif // _LIBCPP_WIN32API class _LIBCPP_EXPORTED_FROM_ABI path { template @@ -667,16 +667,16 @@ public: _LIBCPP_HIDE_FROM_ABI void __reserve(size_t __s) { __pn_.reserve(__s); } // native format observers - _LIBCPP_HIDE_FROM_ABI const string_type& native() const noexcept { return __pn_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const string_type& native() const noexcept { return __pn_; } - _LIBCPP_HIDE_FROM_ABI const value_type* c_str() const noexcept { return __pn_.c_str(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const value_type* c_str() const noexcept { return __pn_.c_str(); } _LIBCPP_HIDE_FROM_ABI operator string_type() const { return __pn_; } # if defined(_LIBCPP_WIN32API) - _LIBCPP_HIDE_FROM_ABI std::wstring wstring() const { return __pn_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::wstring wstring() const { return __pn_; } - _LIBCPP_HIDE_FROM_ABI std::wstring generic_wstring() const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::wstring generic_wstring() const { std::wstring __s; __s.resize(__pn_.size()); std::replace_copy(__pn_.begin(), __pn_.end(), __s.begin(), '\\', '/'); @@ -685,6 +685,7 @@ public: # if _LIBCPP_HAS_LOCALIZATION template , class _Allocator = allocator<_ECharT> > + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator> string(const _Allocator& __a = _Allocator()) const { using _Str = basic_string<_ECharT, _Traits, _Allocator>; _Str __s(__a); @@ -693,8 +694,8 @@ public: return __s; } - _LIBCPP_HIDE_FROM_ABI std::string string() const { return string(); } - _LIBCPP_HIDE_FROM_ABI __u8_string u8string() const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::string string() const { return string(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI __u8_string u8string() const { using _CVT = __narrow_to_utf8; __u8_string __s; __s.reserve(__pn_.size()); @@ -702,12 +703,12 @@ public: return __s; } - _LIBCPP_HIDE_FROM_ABI std::u16string u16string() const { return string(); } - _LIBCPP_HIDE_FROM_ABI std::u32string u32string() const { return string(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u16string u16string() const { return string(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u32string u32string() const { return string(); } // generic format observers template , class _Allocator = allocator<_ECharT> > - _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator> generic_string(const _Allocator& __a = _Allocator()) const { using _Str = basic_string<_ECharT, _Traits, _Allocator>; _Str __s = string<_ECharT, _Traits, _Allocator>(__a); @@ -718,10 +719,10 @@ public: return __s; } - _LIBCPP_HIDE_FROM_ABI std::string generic_string() const { return generic_string(); } - _LIBCPP_HIDE_FROM_ABI std::u16string generic_u16string() const { return generic_string(); } - _LIBCPP_HIDE_FROM_ABI std::u32string generic_u32string() const { return generic_string(); } - _LIBCPP_HIDE_FROM_ABI __u8_string generic_u8string() const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::string generic_string() const { return generic_string(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u16string generic_u16string() const { return generic_string(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u32string generic_u32string() const { return generic_string(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI __u8_string generic_u8string() const { __u8_string __s = u8string(); std::replace(__s.begin(), __s.end(), '\\', '/'); return __s; @@ -729,15 +730,18 @@ public: # endif // _LIBCPP_HAS_LOCALIZATION # else /* _LIBCPP_WIN32API */ - _LIBCPP_HIDE_FROM_ABI std::string string() const { return __pn_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::string string() const { return __pn_; } # if _LIBCPP_HAS_CHAR8_T - _LIBCPP_HIDE_FROM_ABI std::u8string u8string() const { return std::u8string(__pn_.begin(), __pn_.end()); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u8string u8string() const { + return std::u8string(__pn_.begin(), __pn_.end()); + } # else - _LIBCPP_HIDE_FROM_ABI std::string u8string() const { return __pn_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::string u8string() const { return __pn_; } # endif # if _LIBCPP_HAS_LOCALIZATION template , class _Allocator = allocator<_ECharT> > + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator> string(const _Allocator& __a = _Allocator()) const { using _CVT = __widen_from_utf8; using _Str = basic_string<_ECharT, _Traits, _Allocator>; @@ -748,32 +752,34 @@ public: } # if _LIBCPP_HAS_WIDE_CHARACTERS - _LIBCPP_HIDE_FROM_ABI std::wstring wstring() const { return string(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::wstring wstring() const { return string(); } # endif - _LIBCPP_HIDE_FROM_ABI std::u16string u16string() const { return string(); } - _LIBCPP_HIDE_FROM_ABI std::u32string u32string() const { return string(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u16string u16string() const { return string(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u32string u32string() const { return string(); } # endif // _LIBCPP_HAS_LOCALIZATION // generic format observers - _LIBCPP_HIDE_FROM_ABI std::string generic_string() const { return __pn_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::string generic_string() const { return __pn_; } # if _LIBCPP_HAS_CHAR8_T - _LIBCPP_HIDE_FROM_ABI std::u8string generic_u8string() const { return std::u8string(__pn_.begin(), __pn_.end()); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u8string generic_u8string() const { + return std::u8string(__pn_.begin(), __pn_.end()); + } # else - _LIBCPP_HIDE_FROM_ABI std::string generic_u8string() const { return __pn_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::string generic_u8string() const { return __pn_; } # endif # if _LIBCPP_HAS_LOCALIZATION template , class _Allocator = allocator<_ECharT> > - _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator> generic_string(const _Allocator& __a = _Allocator()) const { return string<_ECharT, _Traits, _Allocator>(__a); } # if _LIBCPP_HAS_WIDE_CHARACTERS - _LIBCPP_HIDE_FROM_ABI std::wstring generic_wstring() const { return string(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::wstring generic_wstring() const { return string(); } # endif - _LIBCPP_HIDE_FROM_ABI std::u16string generic_u16string() const { return string(); } - _LIBCPP_HIDE_FROM_ABI std::u32string generic_u32string() const { return string(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u16string generic_u16string() const { return string(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u32string generic_u32string() const { return string(); } # endif // _LIBCPP_HAS_LOCALIZATION # endif /* !_LIBCPP_WIN32API */ @@ -790,40 +796,40 @@ private: public: // compare - _LIBCPP_HIDE_FROM_ABI int compare(const path& __p) const noexcept { return __compare(__p.__pn_); } - _LIBCPP_HIDE_FROM_ABI int compare(const string_type& __s) const { return __compare(__s); } - _LIBCPP_HIDE_FROM_ABI int compare(__string_view __s) const { return __compare(__s); } - _LIBCPP_HIDE_FROM_ABI int compare(const value_type* __s) const { return __compare(__s); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI int compare(const path& __p) const noexcept { return __compare(__p.__pn_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI int compare(const string_type& __s) const { return __compare(__s); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI int compare(__string_view __s) const { return __compare(__s); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI int compare(const value_type* __s) const { return __compare(__s); } // decomposition - _LIBCPP_HIDE_FROM_ABI path root_name() const { return string_type(__root_name()); } - _LIBCPP_HIDE_FROM_ABI path root_directory() const { return string_type(__root_directory()); } - _LIBCPP_HIDE_FROM_ABI path root_path() const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI path root_name() const { return string_type(__root_name()); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI path root_directory() const { return string_type(__root_directory()); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI path root_path() const { # if defined(_LIBCPP_WIN32API) return string_type(__root_path_raw()); # else return root_name().append(string_type(__root_directory())); # endif } - _LIBCPP_HIDE_FROM_ABI path relative_path() const { return string_type(__relative_path()); } - _LIBCPP_HIDE_FROM_ABI path parent_path() const { return string_type(__parent_path()); } - _LIBCPP_HIDE_FROM_ABI path filename() const { return string_type(__filename()); } - _LIBCPP_HIDE_FROM_ABI path stem() const { return string_type(__stem()); } - _LIBCPP_HIDE_FROM_ABI path extension() const { return string_type(__extension()); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI path relative_path() const { return string_type(__relative_path()); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI path parent_path() const { return string_type(__parent_path()); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI path filename() const { return string_type(__filename()); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI path stem() const { return string_type(__stem()); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI path extension() const { return string_type(__extension()); } // query [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __pn_.empty(); } - _LIBCPP_HIDE_FROM_ABI bool has_root_name() const { return !__root_name().empty(); } - _LIBCPP_HIDE_FROM_ABI bool has_root_directory() const { return !__root_directory().empty(); } - _LIBCPP_HIDE_FROM_ABI bool has_root_path() const { return !__root_path_raw().empty(); } - _LIBCPP_HIDE_FROM_ABI bool has_relative_path() const { return !__relative_path().empty(); } - _LIBCPP_HIDE_FROM_ABI bool has_parent_path() const { return !__parent_path().empty(); } - _LIBCPP_HIDE_FROM_ABI bool has_filename() const { return !__filename().empty(); } - _LIBCPP_HIDE_FROM_ABI bool has_stem() const { return !__stem().empty(); } - _LIBCPP_HIDE_FROM_ABI bool has_extension() const { return !__extension().empty(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool has_root_name() const { return !__root_name().empty(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool has_root_directory() const { return !__root_directory().empty(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool has_root_path() const { return !__root_path_raw().empty(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool has_relative_path() const { return !__relative_path().empty(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool has_parent_path() const { return !__parent_path().empty(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool has_filename() const { return !__filename().empty(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool has_stem() const { return !__stem().empty(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool has_extension() const { return !__extension().empty(); } - _LIBCPP_HIDE_FROM_ABI bool is_absolute() const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_absolute() const { # if defined(_LIBCPP_WIN32API) __string_view __root_name_str = __root_name(); __string_view __root_dir = __root_directory(); @@ -847,13 +853,13 @@ public: return has_root_directory(); # endif } - _LIBCPP_HIDE_FROM_ABI bool is_relative() const { return !is_absolute(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_relative() const { return !is_absolute(); } // relative paths - path lexically_normal() const; - path lexically_relative(const path& __base) const; + [[nodiscard]] path lexically_normal() const; + [[nodiscard]] path lexically_relative(const path& __base) const; - _LIBCPP_HIDE_FROM_ABI path lexically_proximate(const path& __base) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI path lexically_proximate(const path& __base) const { path __result = this->lexically_relative(__base); if (__result.native().empty()) return *this; @@ -861,11 +867,11 @@ public: } // iterators - class _LIBCPP_EXPORTED_FROM_ABI iterator; + class iterator; typedef iterator const_iterator; - iterator begin() const; - iterator end() const; + [[nodiscard]] iterator begin() const; + [[nodiscard]] iterator end() const; # if _LIBCPP_HAS_LOCALIZATION template < @@ -908,17 +914,15 @@ private: inline _LIBCPP_HIDE_FROM_ABI void swap(path& __lhs, path& __rhs) noexcept { __lhs.swap(__rhs); } -_LIBCPP_EXPORTED_FROM_ABI size_t hash_value(const path& __p) noexcept; - -_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP +[[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI size_t hash_value(const path& __p) noexcept; _LIBCPP_END_NAMESPACE_FILESYSTEM _LIBCPP_BEGIN_NAMESPACE_STD template <> -struct _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY hash : __unary_function { - _LIBCPP_HIDE_FROM_ABI size_t operator()(filesystem::path const& __p) const noexcept { +struct hash : __unary_function { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t operator()(filesystem::path const& __p) const noexcept { return filesystem::hash_value(__p); } }; diff --git a/lib/libcxx/include/__filesystem/path_iterator.h b/lib/libcxx/include/__filesystem/path_iterator.h index e0f601662d..dd408a76ca 100644 --- a/lib/libcxx/include/__filesystem/path_iterator.h +++ b/lib/libcxx/include/__filesystem/path_iterator.h @@ -52,7 +52,7 @@ public: _LIBCPP_HIDE_FROM_ABI iterator& operator=(const iterator&) = default; - _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __stashed_elem_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __stashed_elem_; } _LIBCPP_HIDE_FROM_ABI pointer operator->() const { return &__stashed_elem_; } @@ -95,12 +95,10 @@ private: _ParserState __state_; }; -_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY inline _LIBCPP_HIDE_FROM_ABI bool operator==(const path::iterator& __lhs, const path::iterator& __rhs) { return __lhs.__path_ptr_ == __rhs.__path_ptr_ && __lhs.__entry_.data() == __rhs.__entry_.data(); } -_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY inline _LIBCPP_HIDE_FROM_ABI bool operator!=(const path::iterator& __lhs, const path::iterator& __rhs) { return !(__lhs == __rhs); } diff --git a/lib/libcxx/include/__filesystem/perm_options.h b/lib/libcxx/include/__filesystem/perm_options.h index 64c16ee60a..a2ab733eb2 100644 --- a/lib/libcxx/include/__filesystem/perm_options.h +++ b/lib/libcxx/include/__filesystem/perm_options.h @@ -22,19 +22,19 @@ _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM enum class perm_options : unsigned char { replace = 1, add = 2, remove = 4, nofollow = 8 }; -_LIBCPP_HIDE_FROM_ABI inline constexpr perm_options operator&(perm_options __lhs, perm_options __rhs) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr perm_options operator&(perm_options __lhs, perm_options __rhs) { return static_cast(static_cast(__lhs) & static_cast(__rhs)); } -_LIBCPP_HIDE_FROM_ABI inline constexpr perm_options operator|(perm_options __lhs, perm_options __rhs) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr perm_options operator|(perm_options __lhs, perm_options __rhs) { return static_cast(static_cast(__lhs) | static_cast(__rhs)); } -_LIBCPP_HIDE_FROM_ABI inline constexpr perm_options operator^(perm_options __lhs, perm_options __rhs) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr perm_options operator^(perm_options __lhs, perm_options __rhs) { return static_cast(static_cast(__lhs) ^ static_cast(__rhs)); } -_LIBCPP_HIDE_FROM_ABI inline constexpr perm_options operator~(perm_options __lhs) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr perm_options operator~(perm_options __lhs) { return static_cast(~static_cast(__lhs)); } diff --git a/lib/libcxx/include/__filesystem/perms.h b/lib/libcxx/include/__filesystem/perms.h index 458f1e6e53..042f249e12 100644 --- a/lib/libcxx/include/__filesystem/perms.h +++ b/lib/libcxx/include/__filesystem/perms.h @@ -51,19 +51,19 @@ enum class perms : unsigned { unknown = 0xFFFF, }; -_LIBCPP_HIDE_FROM_ABI inline constexpr perms operator&(perms __lhs, perms __rhs) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr perms operator&(perms __lhs, perms __rhs) { return static_cast(static_cast(__lhs) & static_cast(__rhs)); } -_LIBCPP_HIDE_FROM_ABI inline constexpr perms operator|(perms __lhs, perms __rhs) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr perms operator|(perms __lhs, perms __rhs) { return static_cast(static_cast(__lhs) | static_cast(__rhs)); } -_LIBCPP_HIDE_FROM_ABI inline constexpr perms operator^(perms __lhs, perms __rhs) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr perms operator^(perms __lhs, perms __rhs) { return static_cast(static_cast(__lhs) ^ static_cast(__rhs)); } -_LIBCPP_HIDE_FROM_ABI inline constexpr perms operator~(perms __lhs) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr perms operator~(perms __lhs) { return static_cast(~static_cast(__lhs)); } diff --git a/lib/libcxx/include/__filesystem/recursive_directory_iterator.h b/lib/libcxx/include/__filesystem/recursive_directory_iterator.h index ad01a9982b..18165b0031 100644 --- a/lib/libcxx/include/__filesystem/recursive_directory_iterator.h +++ b/lib/libcxx/include/__filesystem/recursive_directory_iterator.h @@ -33,8 +33,6 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM -_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH - class recursive_directory_iterator { public: using value_type = directory_entry; @@ -73,7 +71,7 @@ public: _LIBCPP_HIDE_FROM_ABI ~recursive_directory_iterator() = default; - _LIBCPP_HIDE_FROM_ABI const directory_entry& operator*() const { return __dereference(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const directory_entry& operator*() const { return __dereference(); } _LIBCPP_HIDE_FROM_ABI const directory_entry* operator->() const { return &__dereference(); } @@ -87,14 +85,14 @@ public: _LIBCPP_HIDE_FROM_ABI recursive_directory_iterator& increment(error_code& __ec) { return __increment(&__ec); } - _LIBCPP_EXPORTED_FROM_ABI directory_options options() const; - _LIBCPP_EXPORTED_FROM_ABI int depth() const; + [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI directory_options options() const; + [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI int depth() const; _LIBCPP_HIDE_FROM_ABI void pop() { __pop(); } _LIBCPP_HIDE_FROM_ABI void pop(error_code& __ec) { __pop(&__ec); } - _LIBCPP_HIDE_FROM_ABI bool recursion_pending() const { return __rec_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool recursion_pending() const { return __rec_; } _LIBCPP_HIDE_FROM_ABI void disable_recursion_pending() { __rec_ = false; } @@ -132,27 +130,24 @@ operator!=(const recursive_directory_iterator& __lhs, const recursive_directory_ return !(__lhs == __rhs); } // enable recursive_directory_iterator range-based for statements -inline _LIBCPP_HIDE_FROM_ABI recursive_directory_iterator begin(recursive_directory_iterator __iter) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI recursive_directory_iterator +begin(recursive_directory_iterator __iter) noexcept { return __iter; } -inline _LIBCPP_HIDE_FROM_ABI recursive_directory_iterator end(recursive_directory_iterator) noexcept { +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI recursive_directory_iterator end(recursive_directory_iterator) noexcept { return recursive_directory_iterator(); } -_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP - _LIBCPP_END_NAMESPACE_FILESYSTEM # if _LIBCPP_STD_VER >= 20 template <> -_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY inline constexpr bool - std::ranges::enable_borrowed_range = true; +inline constexpr bool std::ranges::enable_borrowed_range = true; template <> -_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY inline constexpr bool - std::ranges::enable_view = true; +inline constexpr bool std::ranges::enable_view = true; # endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__filesystem/space_info.h b/lib/libcxx/include/__filesystem/space_info.h index 3fa57d3309..28f7ced40d 100644 --- a/lib/libcxx/include/__filesystem/space_info.h +++ b/lib/libcxx/include/__filesystem/space_info.h @@ -21,7 +21,7 @@ _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM -struct _LIBCPP_EXPORTED_FROM_ABI space_info { +struct space_info { uintmax_t capacity; uintmax_t free; uintmax_t available; diff --git a/lib/libcxx/include/__filesystem/u8path.h b/lib/libcxx/include/__filesystem/u8path.h index a701425e42..aabd2bbd3c 100644 --- a/lib/libcxx/include/__filesystem/u8path.h +++ b/lib/libcxx/include/__filesystem/u8path.h @@ -24,32 +24,32 @@ _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM -_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH - +# if !defined(_LIBCPP_WIN32API) || _LIBCPP_HAS_LOCALIZATION template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(_InputIt __f, _InputIt __l) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(_InputIt __f, _InputIt __l) { static_assert( -# if _LIBCPP_HAS_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T is_same::__char_type, char8_t>::value || -# endif +# endif is_same::__char_type, char>::value, "u8path(Iter, Iter) requires Iter have a value_type of type 'char'" " or 'char8_t'"); -# if defined(_LIBCPP_WIN32API) +# if defined(_LIBCPP_WIN32API) string __tmp(__f, __l); using _CVT = __widen_from_utf8; std::wstring __w; __w.reserve(__tmp.size()); _CVT()(back_inserter(__w), __tmp.data(), __tmp.data() + __tmp.size()); return path(__w); -# else +# else return path(__f, __l); -# endif /* !_LIBCPP_WIN32API */ +# endif // defined(_LIBCPP_WIN32API) } +# endif // !defined(_LIBCPP_WIN32API) || _LIBCPP_HAS_LOCALIZATION -# if defined(_LIBCPP_WIN32API) +# if defined(_LIBCPP_WIN32API) && _LIBCPP_HAS_LOCALIZATION template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(_InputIt __f, _NullSentinel) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(_InputIt __f, _NullSentinel) { static_assert( # if _LIBCPP_HAS_CHAR8_T is_same::__char_type, char8_t>::value || @@ -67,10 +67,10 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(_InputIt __f, _CVT()(back_inserter(__w), __tmp.data(), __tmp.data() + __tmp.size()); return path(__w); } -# endif /* _LIBCPP_WIN32API */ +# endif // defined(_LIBCPP_WIN32API) && _LIBCPP_HAS_LOCALIZATION template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(const _Source& __s) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(const _Source& __s) { static_assert( # if _LIBCPP_HAS_CHAR8_T is_same::__char_type, char8_t>::value || @@ -86,8 +86,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(const _Source& # endif } -_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP - _LIBCPP_END_NAMESPACE_FILESYSTEM #endif // _LIBCPP_STD_VER >= 17 diff --git a/lib/libcxx/include/__flat_map/flat_map.h b/lib/libcxx/include/__flat_map/flat_map.h index bf193f6d3c..50487cada2 100644 --- a/lib/libcxx/include/__flat_map/flat_map.h +++ b/lib/libcxx/include/__flat_map/flat_map.h @@ -29,7 +29,6 @@ #include <__flat_map/key_value_iterator.h> #include <__flat_map/sorted_unique.h> #include <__flat_map/utils.h> -#include <__functional/invoke.h> #include <__functional/is_transparent.h> #include <__functional/operations.h> #include <__fwd/memory.h> @@ -48,7 +47,7 @@ #include <__ranges/container_compatible_range.h> #include <__ranges/drop_view.h> #include <__ranges/from_range.h> -#include <__ranges/ref_view.h> +#include <__ranges/range_adaptor.h> #include <__ranges/size.h> #include <__ranges/subrange.h> #include <__ranges/zip_view.h> @@ -410,41 +409,45 @@ public: } // iterators - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator begin() noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator begin() noexcept { return iterator(__containers_.keys.begin(), __containers_.values.begin()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator begin() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator begin() const noexcept { return const_iterator(__containers_.keys.begin(), __containers_.values.begin()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator end() noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator end() noexcept { return iterator(__containers_.keys.end(), __containers_.values.end()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator end() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator end() const noexcept { return const_iterator(__containers_.keys.end(), __containers_.values.end()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rbegin() noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rbegin() noexcept { return reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rbegin() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rend() noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rend() noexcept { return reverse_iterator(begin()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rend() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cbegin() const noexcept { return begin(); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cend() const noexcept { return end(); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crbegin() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cbegin() const noexcept { + return begin(); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cend() const noexcept { + return end(); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crend() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crend() const noexcept { return const_reverse_iterator(begin()); } @@ -453,11 +456,11 @@ public: return __containers_.keys.empty(); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type size() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type size() const noexcept { return __containers_.keys.size(); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type max_size() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type max_size() const noexcept { return std::min(__containers_.keys.max_size(), __containers_.values.max_size()); } @@ -481,7 +484,7 @@ public: return try_emplace(std::forward<_Kp>(__x)).first->second; } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& at(const key_type& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& at(const key_type& __x) { auto __it = find(__x); if (__it == end()) { std::__throw_out_of_range("flat_map::at(const key_type&): Key does not exist"); @@ -489,7 +492,7 @@ public: return __it->second; } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const mapped_type& at(const key_type& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const mapped_type& at(const key_type& __x) const { auto __it = find(__x); if (__it == end()) { std::__throw_out_of_range("flat_map::at(const key_type&) const: Key does not exist"); @@ -499,7 +502,7 @@ public: template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& at(const _Kp& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& at(const _Kp& __x) { auto __it = find(__x); if (__it == end()) { std::__throw_out_of_range("flat_map::at(const K&): Key does not exist"); @@ -509,7 +512,7 @@ public: template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const mapped_type& at(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const mapped_type& at(const _Kp& __x) const { auto __it = find(__x); if (__it == end()) { std::__throw_out_of_range("flat_map::at(const K&) const: Key does not exist"); @@ -589,6 +592,15 @@ public: __append_sort_merge_unique(ranges::begin(__range), ranges::end(__range)); } + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert_range(sorted_unique_t, _Range&& __range) { + if constexpr (ranges::sized_range<_Range>) { + __reserve(ranges::size(__range)); + } + + __append_sort_merge_unique(ranges::begin(__range), ranges::end(__range)); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(initializer_list __il) { insert(__il.begin(), __il.end()); } @@ -597,7 +609,7 @@ public: insert(sorted_unique, __il.begin(), __il.end()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 containers extract() && { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 containers extract() && { auto __guard = std::__make_scope_guard([&]() noexcept { clear() /* noexcept */; }); auto __ret = std::move(__containers_); return __ret; @@ -738,14 +750,17 @@ public: return iterator(std::move(__key_it), std::move(__mapped_it)); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void swap(flat_map& __y) noexcept { - // warning: The spec has unconditional noexcept, which means that - // if any of the following functions throw an exception, - // std::terminate will be called. - // This is discussed in P2767, which hasn't been voted on yet. + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + swap(flat_map& __y) noexcept(is_nothrow_swappable_v && + is_nothrow_swappable_v && is_nothrow_swappable_v) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { + clear() /* noexcept */; + __y.clear() /* noexcept */; + }); ranges::swap(__compare_, __y.__compare_); ranges::swap(__containers_.keys, __y.__containers_.keys); ranges::swap(__containers_.values, __y.__containers_.values); + __on_failure.__complete(); } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void clear() noexcept { @@ -754,116 +769,121 @@ public: } // observers - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 key_compare key_comp() const { return __compare_; } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare value_comp() const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 key_compare key_comp() const { return __compare_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare value_comp() const { return value_compare(__compare_); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const key_container_type& keys() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const key_container_type& keys() const noexcept { return __containers_.keys; } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const mapped_container_type& values() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const mapped_container_type& + values() const noexcept { return __containers_.values; } // map operations - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const key_type& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const key_type& __x) { return __find_impl(*this, __x); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const key_type& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const key_type& __x) const { return __find_impl(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const _Kp& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const _Kp& __x) { return __find_impl(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const _Kp& __x) const { return __find_impl(*this, __x); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const key_type& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const key_type& __x) const { return contains(__x) ? 1 : 0; } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const _Kp& __x) const { return contains(__x) ? 1 : 0; } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const key_type& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const key_type& __x) const { return find(__x) != end(); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const _Kp& __x) const { return find(__x) != end(); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const key_type& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const key_type& __x) { return __lower_bound(*this, __x); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const key_type& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator + lower_bound(const key_type& __x) const { return __lower_bound(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const _Kp& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const _Kp& __x) { return __lower_bound(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const _Kp& __x) const { return __lower_bound(*this, __x); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const key_type& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const key_type& __x) { return __upper_bound(*this, __x); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const key_type& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator + upper_bound(const key_type& __x) const { return __upper_bound(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const _Kp& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const _Kp& __x) { return __upper_bound(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const _Kp& __x) const { return __upper_bound(*this, __x); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair equal_range(const key_type& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + equal_range(const key_type& __x) { return __equal_range_impl(*this, __x); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair equal_range(const key_type& __x) const { return __equal_range_impl(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair equal_range(const _Kp& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + equal_range(const _Kp& __x) { return __equal_range_impl(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair equal_range(const _Kp& __x) const { return __equal_range_impl(*this, __x); } @@ -878,7 +898,8 @@ public: __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); } - friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void swap(flat_map& __x, flat_map& __y) noexcept { + friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + swap(flat_map& __x, flat_map& __y) noexcept(noexcept(__x.swap(__y))) { __x.swap(__y); } @@ -913,7 +934,7 @@ private: __compare_(std::forward<_CompArg>(__comp)...) {} _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool __is_sorted_and_unique(auto&& __key_container) const { - auto __greater_or_equal_to = [this](const auto& __x, const auto& __y) { return !__compare_(__x, __y); }; + auto __greater_or_equal_to = [this](const auto& __x, const auto& __y) -> bool { return !__compare_(__x, __y); }; return ranges::adjacent_find(__key_container, __greater_or_equal_to) == ranges::end(__key_container); } @@ -946,7 +967,7 @@ private: auto __zv = ranges::views::zip(__containers_.keys, __containers_.values); auto __append_start_offset = __containers_.keys.size() - __num_of_appended; auto __end = __zv.end(); - auto __compare_key = [this](const auto& __p1, const auto& __p2) { + auto __compare_key = [this](const auto& __p1, const auto& __p2) -> bool { return __compare_(std::get<0>(__p1), std::get<0>(__p2)); }; if constexpr (!_WasSorted) { @@ -1125,8 +1146,7 @@ private: }; template > - requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && - !__is_allocator<_MappedContainer>::value && + requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer> && is_invocable_v) @@ -1139,7 +1159,7 @@ flat_map(_KeyContainer, _MappedContainer, _Compare = _Compare()) template requires(uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> && - !__is_allocator<_KeyContainer>::value && !__is_allocator<_MappedContainer>::value) + !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer>) flat_map(_KeyContainer, _MappedContainer, _Allocator) -> flat_map; template - requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && - !__is_allocator<_MappedContainer>::value && uses_allocator_v<_KeyContainer, _Allocator> && - uses_allocator_v<_MappedContainer, _Allocator> && + requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer> && + uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> && is_invocable_v) @@ -1162,8 +1181,7 @@ flat_map(_KeyContainer, _MappedContainer, _Compare, _Allocator) _MappedContainer>; template > - requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && - !__is_allocator<_MappedContainer>::value && + requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer> && is_invocable_v) @@ -1176,7 +1194,7 @@ flat_map(sorted_unique_t, _KeyContainer, _MappedContainer, _Compare = _Compare() template requires(uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> && - !__is_allocator<_KeyContainer>::value && !__is_allocator<_MappedContainer>::value) + !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer>) flat_map(sorted_unique_t, _KeyContainer, _MappedContainer, _Allocator) -> flat_map; template - requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && - !__is_allocator<_MappedContainer>::value && uses_allocator_v<_KeyContainer, _Allocator> && - uses_allocator_v<_MappedContainer, _Allocator> && + requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer> && + uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> && is_invocable_v) @@ -1199,19 +1216,19 @@ flat_map(sorted_unique_t, _KeyContainer, _MappedContainer, _Compare, _Allocator) _MappedContainer>; template >> - requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value) + requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator_v<_Compare>) flat_map(_InputIterator, _InputIterator, _Compare = _Compare()) -> flat_map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare>; template >> - requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value) + requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator_v<_Compare>) flat_map(sorted_unique_t, _InputIterator, _InputIterator, _Compare = _Compare()) -> flat_map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare>; template >, class _Allocator = allocator, - class = __enable_if_t::value && __is_allocator<_Allocator>::value>> + class = __enable_if_t && __is_allocator_v<_Allocator>>> flat_map(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator()) -> flat_map< __range_key_type<_Range>, __range_mapped_type<_Range>, @@ -1219,7 +1236,7 @@ flat_map(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator( vector<__range_key_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_key_type<_Range>>>, vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>; -template ::value>> +template >> flat_map(from_range_t, _Range&&, _Allocator) -> flat_map< __range_key_type<_Range>, __range_mapped_type<_Range>, @@ -1228,11 +1245,11 @@ flat_map(from_range_t, _Range&&, _Allocator) -> flat_map< vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>; template > - requires(!__is_allocator<_Compare>::value) + requires(!__is_allocator_v<_Compare>) flat_map(initializer_list>, _Compare = _Compare()) -> flat_map<_Key, _Tp, _Compare>; template > - requires(!__is_allocator<_Compare>::value) + requires(!__is_allocator_v<_Compare>) flat_map(sorted_unique_t, initializer_list>, _Compare = _Compare()) -> flat_map<_Key, _Tp, _Compare>; template diff --git a/lib/libcxx/include/__flat_map/flat_multimap.h b/lib/libcxx/include/__flat_map/flat_multimap.h index 0af6aac00c..72e3b5f216 100644 --- a/lib/libcxx/include/__flat_map/flat_multimap.h +++ b/lib/libcxx/include/__flat_map/flat_multimap.h @@ -22,7 +22,6 @@ #include <__algorithm/upper_bound.h> #include <__assert> #include <__compare/synth_three_way.h> -#include <__concepts/convertible_to.h> #include <__concepts/swappable.h> #include <__config> #include <__cstddef/byte.h> @@ -30,7 +29,6 @@ #include <__flat_map/key_value_iterator.h> #include <__flat_map/sorted_equivalent.h> #include <__flat_map/utils.h> -#include <__functional/invoke.h> #include <__functional/is_transparent.h> #include <__functional/operations.h> #include <__fwd/vector.h> @@ -47,7 +45,7 @@ #include <__ranges/container_compatible_range.h> #include <__ranges/drop_view.h> #include <__ranges/from_range.h> -#include <__ranges/ref_view.h> +#include <__ranges/range_adaptor.h> #include <__ranges/size.h> #include <__ranges/subrange.h> #include <__ranges/zip_view.h> @@ -57,14 +55,12 @@ #include <__type_traits/is_allocator.h> #include <__type_traits/is_nothrow_constructible.h> #include <__type_traits/is_same.h> -#include <__type_traits/maybe_const.h> #include <__utility/exception_guard.h> #include <__utility/move.h> #include <__utility/pair.h> #include <__utility/scope_guard.h> #include <__vector/vector.h> #include -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -114,11 +110,12 @@ public: class value_compare { private: _LIBCPP_NO_UNIQUE_ADDRESS key_compare __comp_; - _LIBCPP_HIDE_FROM_ABI value_compare(key_compare __c) : __comp_(__c) {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare(key_compare __c) : __comp_(__c) {} friend flat_multimap; public: - _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool + operator()(const_reference __x, const_reference __y) const { return __comp_(__x.first, __y.first); } }; @@ -137,17 +134,17 @@ private: public: // [flat.map.cons], construct/copy/destroy - _LIBCPP_HIDE_FROM_ABI flat_multimap() noexcept( + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap() noexcept( is_nothrow_default_constructible_v<_KeyContainer> && is_nothrow_default_constructible_v<_MappedContainer> && is_nothrow_default_constructible_v<_Compare>) : __containers_(), __compare_() {} - _LIBCPP_HIDE_FROM_ABI flat_multimap(const flat_multimap&) = default; + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(const flat_multimap&) = default; // The copy/move constructors are not specified in the spec, which means they should be defaulted. // However, the move constructor can potentially leave a moved-from object in an inconsistent // state if an exception is thrown. - _LIBCPP_HIDE_FROM_ABI flat_multimap(flat_multimap&& __other) noexcept( + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(flat_multimap&& __other) noexcept( is_nothrow_move_constructible_v<_KeyContainer> && is_nothrow_move_constructible_v<_MappedContainer> && is_nothrow_move_constructible_v<_Compare>) # if _LIBCPP_HAS_EXCEPTIONS @@ -168,7 +165,8 @@ public: template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI flat_multimap(const flat_multimap& __other, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multimap(const flat_multimap& __other, const _Allocator& __alloc) : flat_multimap(__ctor_uses_allocator_tag{}, __alloc, __other.__containers_.keys, @@ -177,7 +175,7 @@ public: template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI flat_multimap(flat_multimap&& __other, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(flat_multimap&& __other, const _Allocator& __alloc) # if _LIBCPP_HAS_EXCEPTIONS try # endif // _LIBCPP_HAS_EXCEPTIONS @@ -194,7 +192,7 @@ public: # endif // _LIBCPP_HAS_EXCEPTIONS } - _LIBCPP_HIDE_FROM_ABI flat_multimap( + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap( key_container_type __key_cont, mapped_container_type __mapped_cont, const key_compare& __comp = key_compare()) : __containers_{.keys = std::move(__key_cont), .values = std::move(__mapped_cont)}, __compare_(__comp) { _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), @@ -204,7 +202,7 @@ public: template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI flat_multimap( + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap( const key_container_type& __key_cont, const mapped_container_type& __mapped_cont, const _Allocator& __alloc) : flat_multimap(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont) { _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), @@ -214,22 +212,22 @@ public: template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI - flat_multimap(const key_container_type& __key_cont, - const mapped_container_type& __mapped_cont, - const key_compare& __comp, - const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap( + const key_container_type& __key_cont, + const mapped_container_type& __mapped_cont, + const key_compare& __comp, + const _Allocator& __alloc) : flat_multimap(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont, __comp) { _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), "flat_multimap keys and mapped containers have different size"); __sort(); } - _LIBCPP_HIDE_FROM_ABI - flat_multimap(sorted_equivalent_t, - key_container_type __key_cont, - mapped_container_type __mapped_cont, - const key_compare& __comp = key_compare()) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap( + sorted_equivalent_t, + key_container_type __key_cont, + mapped_container_type __mapped_cont, + const key_compare& __comp = key_compare()) : __containers_{.keys = std::move(__key_cont), .values = std::move(__mapped_cont)}, __compare_(__comp) { _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), "flat_multimap keys and mapped containers have different size"); @@ -238,11 +236,11 @@ public: template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI - flat_multimap(sorted_equivalent_t, - const key_container_type& __key_cont, - const mapped_container_type& __mapped_cont, - const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap( + sorted_equivalent_t, + const key_container_type& __key_cont, + const mapped_container_type& __mapped_cont, + const _Allocator& __alloc) : flat_multimap(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont) { _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), "flat_multimap keys and mapped containers have different size"); @@ -251,33 +249,35 @@ public: template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI - flat_multimap(sorted_equivalent_t, - const key_container_type& __key_cont, - const mapped_container_type& __mapped_cont, - const key_compare& __comp, - const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap( + sorted_equivalent_t, + const key_container_type& __key_cont, + const mapped_container_type& __mapped_cont, + const key_compare& __comp, + const _Allocator& __alloc) : flat_multimap(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont, __comp) { _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), "flat_multimap keys and mapped containers have different size"); _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(__is_sorted(__containers_.keys), "Key container is not sorted"); } - _LIBCPP_HIDE_FROM_ABI explicit flat_multimap(const key_compare& __comp) : __containers_(), __compare_(__comp) {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit flat_multimap(const key_compare& __comp) + : __containers_(), __compare_(__comp) {} template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI flat_multimap(const key_compare& __comp, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multimap(const key_compare& __comp, const _Allocator& __alloc) : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) {} template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI explicit flat_multimap(const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit flat_multimap(const _Allocator& __alloc) : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc) {} template requires __has_input_iterator_category<_InputIterator>::value - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(_InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare()) : __containers_(), __compare_(__comp) { insert(__first, __last); @@ -285,7 +285,7 @@ public: template requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(_InputIterator __first, _InputIterator __last, const key_compare& __comp, const _Allocator& __alloc) : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) { insert(__first, __last); @@ -293,91 +293,99 @@ public: template requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) - _LIBCPP_HIDE_FROM_ABI flat_multimap(_InputIterator __first, _InputIterator __last, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multimap(_InputIterator __first, _InputIterator __last, const _Allocator& __alloc) : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc) { insert(__first, __last); } template <_ContainerCompatibleRange _Range> - _LIBCPP_HIDE_FROM_ABI flat_multimap(from_range_t __fr, _Range&& __rg) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(from_range_t __fr, _Range&& __rg) : flat_multimap(__fr, std::forward<_Range>(__rg), key_compare()) {} template <_ContainerCompatibleRange _Range, class _Allocator> requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI flat_multimap(from_range_t, _Range&& __rg, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multimap(from_range_t, _Range&& __rg, const _Allocator& __alloc) : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc) { insert_range(std::forward<_Range>(__rg)); } template <_ContainerCompatibleRange _Range> - _LIBCPP_HIDE_FROM_ABI flat_multimap(from_range_t, _Range&& __rg, const key_compare& __comp) : flat_multimap(__comp) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multimap(from_range_t, _Range&& __rg, const key_compare& __comp) + : flat_multimap(__comp) { insert_range(std::forward<_Range>(__rg)); } template <_ContainerCompatibleRange _Range, class _Allocator> requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI flat_multimap(from_range_t, _Range&& __rg, const key_compare& __comp, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multimap(from_range_t, _Range&& __rg, const key_compare& __comp, const _Allocator& __alloc) : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) { insert_range(std::forward<_Range>(__rg)); } template requires __has_input_iterator_category<_InputIterator>::value - _LIBCPP_HIDE_FROM_ABI flat_multimap( + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap( sorted_equivalent_t, _InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare()) : __containers_(), __compare_(__comp) { insert(sorted_equivalent, __first, __last); } template requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) - _LIBCPP_HIDE_FROM_ABI - flat_multimap(sorted_equivalent_t, - _InputIterator __first, - _InputIterator __last, - const key_compare& __comp, - const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap( + sorted_equivalent_t, + _InputIterator __first, + _InputIterator __last, + const key_compare& __comp, + const _Allocator& __alloc) : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) { insert(sorted_equivalent, __first, __last); } template requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(sorted_equivalent_t, _InputIterator __first, _InputIterator __last, const _Allocator& __alloc) : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc) { insert(sorted_equivalent, __first, __last); } - _LIBCPP_HIDE_FROM_ABI flat_multimap(initializer_list __il, const key_compare& __comp = key_compare()) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multimap(initializer_list __il, const key_compare& __comp = key_compare()) : flat_multimap(__il.begin(), __il.end(), __comp) {} template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(initializer_list __il, const key_compare& __comp, const _Allocator& __alloc) : flat_multimap(__il.begin(), __il.end(), __comp, __alloc) {} template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI flat_multimap(initializer_list __il, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multimap(initializer_list __il, const _Allocator& __alloc) : flat_multimap(__il.begin(), __il.end(), __alloc) {} - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(sorted_equivalent_t, initializer_list __il, const key_compare& __comp = key_compare()) : flat_multimap(sorted_equivalent, __il.begin(), __il.end(), __comp) {} template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI flat_multimap( + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap( sorted_equivalent_t, initializer_list __il, const key_compare& __comp, const _Allocator& __alloc) : flat_multimap(sorted_equivalent, __il.begin(), __il.end(), __comp, __alloc) {} template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI flat_multimap(sorted_equivalent_t, initializer_list __il, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multimap(sorted_equivalent_t, initializer_list __il, const _Allocator& __alloc) : flat_multimap(sorted_equivalent, __il.begin(), __il.end(), __alloc) {} - _LIBCPP_HIDE_FROM_ABI flat_multimap& operator=(initializer_list __il) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap& operator=(initializer_list __il) { clear(); insert(__il); return *this; @@ -386,9 +394,9 @@ public: // copy/move assignment are not specified in the spec (defaulted) // but move assignment can potentially leave moved from object in an inconsistent // state if an exception is thrown - _LIBCPP_HIDE_FROM_ABI flat_multimap& operator=(const flat_multimap&) = default; + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap& operator=(const flat_multimap&) = default; - _LIBCPP_HIDE_FROM_ABI flat_multimap& operator=(flat_multimap&& __other) noexcept( + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap& operator=(flat_multimap&& __other) noexcept( is_nothrow_move_assignable_v<_KeyContainer> && is_nothrow_move_assignable_v<_MappedContainer> && is_nothrow_move_assignable_v<_Compare>) { auto __clear_other_guard = std::__make_scope_guard([&]() noexcept { __other.clear() /* noexcept */; }); @@ -400,38 +408,58 @@ public: } // iterators - _LIBCPP_HIDE_FROM_ABI iterator begin() noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator begin() noexcept { return iterator(__containers_.keys.begin(), __containers_.values.begin()); } - _LIBCPP_HIDE_FROM_ABI const_iterator begin() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator begin() const noexcept { return const_iterator(__containers_.keys.begin(), __containers_.values.begin()); } - _LIBCPP_HIDE_FROM_ABI iterator end() noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator end() noexcept { return iterator(__containers_.keys.end(), __containers_.values.end()); } - _LIBCPP_HIDE_FROM_ABI const_iterator end() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator end() const noexcept { return const_iterator(__containers_.keys.end(), __containers_.values.end()); } - _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() noexcept { return reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() noexcept { return reverse_iterator(begin()); } - _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rbegin() noexcept { + return reverse_iterator(end()); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rbegin() const noexcept { + return const_reverse_iterator(end()); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rend() noexcept { + return reverse_iterator(begin()); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rend() const noexcept { + return const_reverse_iterator(begin()); + } - _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const noexcept { return begin(); } - _LIBCPP_HIDE_FROM_ABI const_iterator cend() const noexcept { return end(); } - _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const noexcept { return const_reverse_iterator(begin()); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cbegin() const noexcept { + return begin(); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cend() const noexcept { + return end(); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crbegin() const noexcept { + return const_reverse_iterator(end()); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crend() const noexcept { + return const_reverse_iterator(begin()); + } // [flat.map.capacity], capacity - [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __containers_.keys.empty(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool empty() const noexcept { + return __containers_.keys.empty(); + } - _LIBCPP_HIDE_FROM_ABI size_type size() const noexcept { return __containers_.keys.size(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type size() const noexcept { + return __containers_.keys.size(); + } - _LIBCPP_HIDE_FROM_ABI size_type max_size() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type max_size() const noexcept { return std::min(__containers_.keys.max_size(), __containers_.values.max_size()); } @@ -439,7 +467,7 @@ public: template requires is_constructible_v, _Args...> && is_move_constructible_v && is_move_constructible_v - _LIBCPP_HIDE_FROM_ABI iterator emplace(_Args&&... __args) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator emplace(_Args&&... __args) { std::pair __pair(std::forward<_Args>(__args)...); auto __key_it = std::upper_bound(__containers_.keys.begin(), __containers_.keys.end(), __pair.first, __compare_); auto __mapped_it = __corresponding_mapped_it(*this, __key_it); @@ -450,7 +478,7 @@ public: template requires is_constructible_v, _Args...> - _LIBCPP_HIDE_FROM_ABI iterator emplace_hint(const_iterator __hint, _Args&&... __args) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator emplace_hint(const_iterator __hint, _Args&&... __args) { std::pair __pair(std::forward<_Args>(__args)...); auto __prev_larger = __hint != cbegin() && __compare_(__pair.first, (__hint - 1)->first); @@ -490,33 +518,35 @@ public: *this, __key_iter, __mapped_iter, std::move(__pair.first), std::move(__pair.second)); } - _LIBCPP_HIDE_FROM_ABI iterator insert(const value_type& __x) { return emplace(__x); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const value_type& __x) { return emplace(__x); } - _LIBCPP_HIDE_FROM_ABI iterator insert(value_type&& __x) { return emplace(std::move(__x)); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(value_type&& __x) { + return emplace(std::move(__x)); + } - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, const value_type& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __hint, const value_type& __x) { return emplace_hint(__hint, __x); } - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, value_type&& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __hint, value_type&& __x) { return emplace_hint(__hint, std::move(__x)); } template requires is_constructible_v, _PairLike> - _LIBCPP_HIDE_FROM_ABI iterator insert(_PairLike&& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(_PairLike&& __x) { return emplace(std::forward<_PairLike>(__x)); } template requires is_constructible_v, _PairLike> - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, _PairLike&& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __hint, _PairLike&& __x) { return emplace_hint(__hint, std::forward<_PairLike>(__x)); } template requires __has_input_iterator_category<_InputIterator>::value - _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __first, _InputIterator __last) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(_InputIterator __first, _InputIterator __last) { if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) { __reserve(__last - __first); } @@ -525,7 +555,8 @@ public: template requires __has_input_iterator_category<_InputIterator>::value - _LIBCPP_HIDE_FROM_ABI void insert(sorted_equivalent_t, _InputIterator __first, _InputIterator __last) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + insert(sorted_equivalent_t, _InputIterator __first, _InputIterator __last) { if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) { __reserve(__last - __first); } @@ -534,7 +565,7 @@ public: } template <_ContainerCompatibleRange _Range> - _LIBCPP_HIDE_FROM_ABI void insert_range(_Range&& __range) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert_range(_Range&& __range) { if constexpr (ranges::sized_range<_Range>) { __reserve(ranges::size(__range)); } @@ -542,19 +573,32 @@ public: __append_sort_merge(ranges::begin(__range), ranges::end(__range)); } - _LIBCPP_HIDE_FROM_ABI void insert(initializer_list __il) { insert(__il.begin(), __il.end()); } + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert_range(sorted_equivalent_t, _Range&& __range) { + if constexpr (ranges::sized_range<_Range>) { + __reserve(ranges::size(__range)); + } - _LIBCPP_HIDE_FROM_ABI void insert(sorted_equivalent_t, initializer_list __il) { + __append_sort_merge(ranges::begin(__range), ranges::end(__range)); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(initializer_list __il) { + insert(__il.begin(), __il.end()); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + insert(sorted_equivalent_t, initializer_list __il) { insert(sorted_equivalent, __il.begin(), __il.end()); } - _LIBCPP_HIDE_FROM_ABI containers extract() && { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 containers extract() && { auto __guard = std::__make_scope_guard([&]() noexcept { clear() /* noexcept */; }); auto __ret = std::move(__containers_); return __ret; } - _LIBCPP_HIDE_FROM_ABI void replace(key_container_type&& __key_cont, mapped_container_type&& __mapped_cont) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + replace(key_container_type&& __key_cont, mapped_container_type&& __mapped_cont) { _LIBCPP_ASSERT_VALID_INPUT_RANGE( __key_cont.size() == __mapped_cont.size(), "flat_multimap keys and mapped containers have different size"); @@ -565,15 +609,15 @@ public: __guard.__complete(); } - _LIBCPP_HIDE_FROM_ABI iterator erase(iterator __position) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator erase(iterator __position) { return __erase(__position.__key_iter_, __position.__mapped_iter_); } - _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __position) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator erase(const_iterator __position) { return __erase(__position.__key_iter_, __position.__mapped_iter_); } - _LIBCPP_HIDE_FROM_ABI size_type erase(const key_type& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type erase(const key_type& __x) { auto [__first, __last] = equal_range(__x); auto __res = __last - __first; erase(__first, __last); @@ -583,14 +627,14 @@ public: template requires(__is_compare_transparent && !is_convertible_v<_Kp &&, iterator> && !is_convertible_v<_Kp &&, const_iterator>) - _LIBCPP_HIDE_FROM_ABI size_type erase(_Kp&& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type erase(_Kp&& __x) { auto [__first, __last] = equal_range(__x); auto __res = __last - __first; erase(__first, __last); return __res; } - _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __first, const_iterator __last) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator erase(const_iterator __first, const_iterator __last) { auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); auto __key_it = __containers_.keys.erase(__first.__key_iter_, __last.__key_iter_); auto __mapped_it = __containers_.values.erase(__first.__mapped_iter_, __last.__mapped_iter_); @@ -598,146 +642,178 @@ public: return iterator(std::move(__key_it), std::move(__mapped_it)); } - _LIBCPP_HIDE_FROM_ABI void swap(flat_multimap& __y) noexcept { - // warning: The spec has unconditional noexcept, which means that - // if any of the following functions throw an exception, - // std::terminate will be called + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void swap(flat_multimap& __y) noexcept( + is_nothrow_swappable_v && is_nothrow_swappable_v && + is_nothrow_swappable_v) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { + clear() /* noexcept */; + __y.clear() /* noexcept */; + }); ranges::swap(__compare_, __y.__compare_); ranges::swap(__containers_.keys, __y.__containers_.keys); ranges::swap(__containers_.values, __y.__containers_.values); + __on_failure.__complete(); } - _LIBCPP_HIDE_FROM_ABI void clear() noexcept { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void clear() noexcept { __containers_.keys.clear(); __containers_.values.clear(); } // observers - _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __compare_; } - _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const { return value_compare(__compare_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 key_compare key_comp() const { return __compare_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare value_comp() const { + return value_compare(__compare_); + } - _LIBCPP_HIDE_FROM_ABI const key_container_type& keys() const noexcept { return __containers_.keys; } - _LIBCPP_HIDE_FROM_ABI const mapped_container_type& values() const noexcept { return __containers_.values; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const key_container_type& keys() const noexcept { + return __containers_.keys; + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const mapped_container_type& + values() const noexcept { + return __containers_.values; + } // map operations - _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __x) { return __find_impl(*this, __x); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const key_type& __x) { + return __find_impl(*this, __x); + } - _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __x) const { return __find_impl(*this, __x); } - - template - requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI iterator find(const _Kp& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const key_type& __x) const { return __find_impl(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI const_iterator find(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const _Kp& __x) { return __find_impl(*this, __x); } - _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __x) const { + template + requires __is_compare_transparent + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const _Kp& __x) const { + return __find_impl(*this, __x); + } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const key_type& __x) const { auto [__first, __last] = equal_range(__x); return __last - __first; } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI size_type count(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const _Kp& __x) const { auto [__first, __last] = equal_range(__x); return __last - __first; } - _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __x) const { return find(__x) != end(); } - - template - requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI bool contains(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const key_type& __x) const { return find(__x) != end(); } - _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __x) { return __lower_bound(*this, __x); } + template + requires __is_compare_transparent + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const _Kp& __x) const { + return find(__x) != end(); + } - _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const key_type& __x) { + return __lower_bound(*this, __x); + } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator + lower_bound(const key_type& __x) const { return __lower_bound(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _Kp& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const _Kp& __x) { return __lower_bound(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const _Kp& __x) const { return __lower_bound(*this, __x); } - _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __x) { return __upper_bound(*this, __x); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const key_type& __x) { + return __upper_bound(*this, __x); + } - _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator + upper_bound(const key_type& __x) const { return __upper_bound(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _Kp& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const _Kp& __x) { return __upper_bound(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const _Kp& __x) const { return __upper_bound(*this, __x); } - _LIBCPP_HIDE_FROM_ABI pair equal_range(const key_type& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + equal_range(const key_type& __x) { return __equal_range_impl(*this, __x); } - _LIBCPP_HIDE_FROM_ABI pair equal_range(const key_type& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + equal_range(const key_type& __x) const { return __equal_range_impl(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI pair equal_range(const _Kp& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + equal_range(const _Kp& __x) { return __equal_range_impl(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI pair equal_range(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + equal_range(const _Kp& __x) const { return __equal_range_impl(*this, __x); } - friend _LIBCPP_HIDE_FROM_ABI bool operator==(const flat_multimap& __x, const flat_multimap& __y) { + friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool + operator==(const flat_multimap& __x, const flat_multimap& __y) { return ranges::equal(__x, __y); } - friend _LIBCPP_HIDE_FROM_ABI auto operator<=>(const flat_multimap& __x, const flat_multimap& __y) { + friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 auto + operator<=>(const flat_multimap& __x, const flat_multimap& __y) { return std::lexicographical_compare_three_way( __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); } - friend _LIBCPP_HIDE_FROM_ABI void swap(flat_multimap& __x, flat_multimap& __y) noexcept { __x.swap(__y); } + friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + swap(flat_multimap& __x, flat_multimap& __y) noexcept(noexcept(__x.swap(__y))) { + __x.swap(__y); + } private: struct __ctor_uses_allocator_tag { - explicit _LIBCPP_HIDE_FROM_ABI __ctor_uses_allocator_tag() = default; + explicit _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __ctor_uses_allocator_tag() = default; }; struct __ctor_uses_allocator_empty_tag { - explicit _LIBCPP_HIDE_FROM_ABI __ctor_uses_allocator_empty_tag() = default; + explicit _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __ctor_uses_allocator_empty_tag() = default; }; template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI - flat_multimap(__ctor_uses_allocator_tag, - const _Allocator& __alloc, - _KeyCont&& __key_cont, - _MappedCont&& __mapped_cont, - _CompArg&&... __comp) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap( + __ctor_uses_allocator_tag, + const _Allocator& __alloc, + _KeyCont&& __key_cont, + _MappedCont&& __mapped_cont, + _CompArg&&... __comp) : __containers_{.keys = std::make_obj_using_allocator( __alloc, std::forward<_KeyCont>(__key_cont)), .values = std::make_obj_using_allocator( @@ -746,36 +822,39 @@ private: template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI flat_multimap(__ctor_uses_allocator_empty_tag, const _Allocator& __alloc, _CompArg&&... __comp) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multimap(__ctor_uses_allocator_empty_tag, const _Allocator& __alloc, _CompArg&&... __comp) : __containers_{.keys = std::make_obj_using_allocator(__alloc), .values = std::make_obj_using_allocator(__alloc)}, __compare_(std::forward<_CompArg>(__comp)...) {} - _LIBCPP_HIDE_FROM_ABI bool __is_sorted(auto&& __key_container) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool __is_sorted(auto&& __key_container) const { return ranges::is_sorted(__key_container, __compare_); } - _LIBCPP_HIDE_FROM_ABI void __sort() { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __sort() { auto __zv = ranges::views::zip(__containers_.keys, __containers_.values); ranges::sort(__zv, __compare_, [](const auto& __p) -> decltype(auto) { return std::get<0>(__p); }); } template - _LIBCPP_HIDE_FROM_ABI static auto __corresponding_mapped_it(_Self&& __self, _KeyIter&& __key_iter) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto + __corresponding_mapped_it(_Self&& __self, _KeyIter&& __key_iter) { return __self.__containers_.values.begin() + static_cast>( ranges::distance(__self.__containers_.keys.begin(), __key_iter)); } template - _LIBCPP_HIDE_FROM_ABI void __append_sort_merge(_InputIterator __first, _Sentinel __last) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + __append_sort_merge(_InputIterator __first, _Sentinel __last) { auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); size_t __num_appended = __flat_map_utils::__append(*this, std::move(__first), std::move(__last)); if (__num_appended != 0) { auto __zv = ranges::views::zip(__containers_.keys, __containers_.values); auto __append_start_offset = __containers_.keys.size() - __num_appended; auto __end = __zv.end(); - auto __compare_key = [this](const auto& __p1, const auto& __p2) { + auto __compare_key = [this](const auto& __p1, const auto& __p2) -> bool { return __compare_(std::get<0>(__p1), std::get<0>(__p2)); }; if constexpr (!_WasSorted) { @@ -791,7 +870,7 @@ private: } template - _LIBCPP_HIDE_FROM_ABI static auto __find_impl(_Self&& __self, const _Kp& __key) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto __find_impl(_Self&& __self, const _Kp& __key) { auto __it = __self.lower_bound(__key); auto __last = __self.end(); if (__it == __last || __self.__compare_(__key, __it->first)) { @@ -801,7 +880,7 @@ private: } template - _LIBCPP_HIDE_FROM_ABI static auto __equal_range_impl(_Self&& __self, const _Kp& __key) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto __equal_range_impl(_Self&& __self, const _Kp& __key) { auto [__key_first, __key_last] = std::equal_range(__self.__containers_.keys.begin(), __self.__containers_.keys.end(), __key, __self.__compare_); @@ -811,7 +890,7 @@ private: } template - _LIBCPP_HIDE_FROM_ABI static _Res __lower_bound(_Self&& __self, _Kp& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static _Res __lower_bound(_Self&& __self, _Kp& __x) { auto __key_iter = std::lower_bound(__self.__containers_.keys.begin(), __self.__containers_.keys.end(), __x, __self.__compare_); auto __mapped_iter = __corresponding_mapped_it(__self, __key_iter); @@ -819,14 +898,14 @@ private: } template - _LIBCPP_HIDE_FROM_ABI static _Res __upper_bound(_Self&& __self, _Kp& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static _Res __upper_bound(_Self&& __self, _Kp& __x) { auto __key_iter = std::upper_bound(__self.__containers_.keys.begin(), __self.__containers_.keys.end(), __x, __self.__compare_); auto __mapped_iter = __corresponding_mapped_it(__self, __key_iter); return _Res(std::move(__key_iter), std::move(__mapped_iter)); } - _LIBCPP_HIDE_FROM_ABI void __reserve(size_t __size) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __reserve(size_t __size) { if constexpr (__container_traits<_KeyContainer>::__reservable) { __containers_.keys.reserve(__size); } @@ -837,7 +916,8 @@ private: } template - _LIBCPP_HIDE_FROM_ABI iterator __erase(_KIter __key_iter_to_remove, _MIter __mapped_iter_to_remove) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator + __erase(_KIter __key_iter_to_remove, _MIter __mapped_iter_to_remove) { auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); auto __key_iter = __containers_.keys.erase(__key_iter_to_remove); auto __mapped_iter = __containers_.values.erase(__mapped_iter_to_remove); @@ -847,7 +927,8 @@ private: template friend typename flat_multimap<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>::size_type - erase_if(flat_multimap<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>&, _Predicate); + _LIBCPP_CONSTEXPR_SINCE_CXX26 + erase_if(flat_multimap<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>&, _Predicate); friend __flat_map_utils; @@ -855,8 +936,9 @@ private: _LIBCPP_NO_UNIQUE_ADDRESS key_compare __compare_; struct __key_equiv { - _LIBCPP_HIDE_FROM_ABI __key_equiv(key_compare __c) : __comp_(__c) {} - _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __key_equiv(key_compare __c) : __comp_(__c) {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool + operator()(const_reference __x, const_reference __y) const { return !__comp_(std::get<0>(__x), std::get<0>(__y)) && !__comp_(std::get<0>(__y), std::get<0>(__x)); } key_compare __comp_; @@ -864,8 +946,7 @@ private: }; template > - requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && - !__is_allocator<_MappedContainer>::value && + requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer> && is_invocable_v) @@ -878,7 +959,7 @@ flat_multimap(_KeyContainer, _MappedContainer, _Compare = _Compare()) template requires(uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> && - !__is_allocator<_KeyContainer>::value && !__is_allocator<_MappedContainer>::value) + !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer>) flat_multimap(_KeyContainer, _MappedContainer, _Allocator) -> flat_multimap; template - requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && - !__is_allocator<_MappedContainer>::value && uses_allocator_v<_KeyContainer, _Allocator> && - uses_allocator_v<_MappedContainer, _Allocator> && + requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer> && + uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> && is_invocable_v) @@ -901,8 +981,7 @@ flat_multimap(_KeyContainer, _MappedContainer, _Compare, _Allocator) _MappedContainer>; template > - requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && - !__is_allocator<_MappedContainer>::value && + requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer> && is_invocable_v) @@ -915,7 +994,7 @@ flat_multimap(sorted_equivalent_t, _KeyContainer, _MappedContainer, _Compare = _ template requires(uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> && - !__is_allocator<_KeyContainer>::value && !__is_allocator<_MappedContainer>::value) + !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer>) flat_multimap(sorted_equivalent_t, _KeyContainer, _MappedContainer, _Allocator) -> flat_multimap; template - requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && - !__is_allocator<_MappedContainer>::value && uses_allocator_v<_KeyContainer, _Allocator> && - uses_allocator_v<_MappedContainer, _Allocator> && + requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer> && + uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> && is_invocable_v) @@ -938,19 +1016,19 @@ flat_multimap(sorted_equivalent_t, _KeyContainer, _MappedContainer, _Compare, _A _MappedContainer>; template >> - requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value) + requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator_v<_Compare>) flat_multimap(_InputIterator, _InputIterator, _Compare = _Compare()) -> flat_multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare>; template >> - requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value) + requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator_v<_Compare>) flat_multimap(sorted_equivalent_t, _InputIterator, _InputIterator, _Compare = _Compare()) -> flat_multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare>; template >, class _Allocator = allocator, - class = __enable_if_t::value && __is_allocator<_Allocator>::value>> + class = __enable_if_t && __is_allocator_v<_Allocator>>> flat_multimap(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator()) -> flat_multimap< __range_key_type<_Range>, __range_mapped_type<_Range>, @@ -958,7 +1036,7 @@ flat_multimap(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Alloc vector<__range_key_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_key_type<_Range>>>, vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>; -template ::value>> +template >> flat_multimap(from_range_t, _Range&&, _Allocator) -> flat_multimap< __range_key_type<_Range>, __range_mapped_type<_Range>, @@ -967,11 +1045,11 @@ flat_multimap(from_range_t, _Range&&, _Allocator) -> flat_multimap< vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>; template > - requires(!__is_allocator<_Compare>::value) + requires(!__is_allocator_v<_Compare>) flat_multimap(initializer_list>, _Compare = _Compare()) -> flat_multimap<_Key, _Tp, _Compare>; template > - requires(!__is_allocator<_Compare>::value) + requires(!__is_allocator_v<_Compare>) flat_multimap(sorted_equivalent_t, initializer_list>, _Compare = _Compare()) -> flat_multimap<_Key, _Tp, _Compare>; @@ -980,8 +1058,9 @@ struct uses_allocator && uses_allocator_v<_MappedContainer, _Allocator>> {}; template -_LIBCPP_HIDE_FROM_ABI typename flat_multimap<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>::size_type -erase_if(flat_multimap<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>& __flat_multimap, _Predicate __pred) { +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + typename flat_multimap<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>::size_type + erase_if(flat_multimap<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>& __flat_multimap, _Predicate __pred) { auto __zv = ranges::views::zip(__flat_multimap.__containers_.keys, __flat_multimap.__containers_.values); auto __first = __zv.begin(); auto __last = __zv.end(); diff --git a/lib/libcxx/include/__flat_map/key_value_iterator.h b/lib/libcxx/include/__flat_map/key_value_iterator.h index d04a23d1f8..795651a079 100644 --- a/lib/libcxx/include/__flat_map/key_value_iterator.h +++ b/lib/libcxx/include/__flat_map/key_value_iterator.h @@ -20,7 +20,6 @@ #include <__type_traits/conditional.h> #include <__utility/forward.h> #include <__utility/move.h> -#include <__utility/pair.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__flat_map/utils.h b/lib/libcxx/include/__flat_map/utils.h index 3a05c71566..4b07e388d0 100644 --- a/lib/libcxx/include/__flat_map/utils.h +++ b/lib/libcxx/include/__flat_map/utils.h @@ -16,6 +16,7 @@ #include <__utility/exception_guard.h> #include <__utility/forward.h> #include <__utility/move.h> +#include <__vector/container_traits.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__flat_set/flat_multiset.h b/lib/libcxx/include/__flat_set/flat_multiset.h index 44d8af05a5..b2de63bc30 100644 --- a/lib/libcxx/include/__flat_set/flat_multiset.h +++ b/lib/libcxx/include/__flat_set/flat_multiset.h @@ -13,54 +13,41 @@ #include <__algorithm/equal_range.h> #include <__algorithm/lexicographical_compare_three_way.h> #include <__algorithm/lower_bound.h> -#include <__algorithm/min.h> #include <__algorithm/ranges_equal.h> #include <__algorithm/ranges_inplace_merge.h> #include <__algorithm/ranges_is_sorted.h> #include <__algorithm/ranges_sort.h> -#include <__algorithm/ranges_unique.h> #include <__algorithm/remove_if.h> #include <__algorithm/upper_bound.h> #include <__assert> #include <__compare/synth_three_way.h> -#include <__concepts/convertible_to.h> #include <__concepts/swappable.h> #include <__config> -#include <__cstddef/byte.h> -#include <__cstddef/ptrdiff_t.h> -#include <__flat_map/key_value_iterator.h> #include <__flat_map/sorted_equivalent.h> #include <__flat_set/ra_iterator.h> #include <__flat_set/utils.h> -#include <__functional/invoke.h> #include <__functional/is_transparent.h> #include <__functional/operations.h> #include <__fwd/vector.h> #include <__iterator/concepts.h> -#include <__iterator/distance.h> #include <__iterator/iterator_traits.h> #include <__iterator/prev.h> -#include <__iterator/ranges_iterator_traits.h> #include <__iterator/reverse_iterator.h> #include <__memory/allocator_traits.h> #include <__memory/uses_allocator.h> #include <__memory/uses_allocator_construction.h> -#include <__ranges/access.h> #include <__ranges/concepts.h> #include <__ranges/container_compatible_range.h> #include <__ranges/drop_view.h> #include <__ranges/from_range.h> -#include <__ranges/ref_view.h> +#include <__ranges/range_adaptor.h> #include <__ranges/size.h> #include <__ranges/subrange.h> -#include <__ranges/zip_view.h> -#include <__type_traits/conjunction.h> #include <__type_traits/container_traits.h> #include <__type_traits/invoke.h> #include <__type_traits/is_allocator.h> #include <__type_traits/is_nothrow_constructible.h> #include <__type_traits/is_same.h> -#include <__type_traits/maybe_const.h> #include <__utility/as_const.h> #include <__utility/exception_guard.h> #include <__utility/move.h> @@ -108,16 +95,16 @@ public: public: // [flat.multiset.cons], constructors - _LIBCPP_HIDE_FROM_ABI flat_multiset() noexcept(is_nothrow_default_constructible_v<_KeyContainer> && - is_nothrow_default_constructible_v<_Compare>) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset() noexcept( + is_nothrow_default_constructible_v<_KeyContainer> && is_nothrow_default_constructible_v<_Compare>) : __keys_(), __compare_() {} - _LIBCPP_HIDE_FROM_ABI flat_multiset(const flat_multiset&) = default; + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(const flat_multiset&) = default; // The copy/move constructors are not specified in the spec, which means they should be defaulted. // However, the move constructor can potentially leave a moved-from object in an inconsistent // state if an exception is thrown. - _LIBCPP_HIDE_FROM_ABI flat_multiset(flat_multiset&& __other) noexcept( + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(flat_multiset&& __other) noexcept( is_nothrow_move_constructible_v<_KeyContainer> && is_nothrow_move_constructible_v<_Compare>) # if _LIBCPP_HAS_EXCEPTIONS try @@ -134,14 +121,16 @@ public: # endif // _LIBCPP_HAS_EXCEPTIONS } - _LIBCPP_HIDE_FROM_ABI explicit flat_multiset(const key_compare& __comp) : __keys_(), __compare_(__comp) {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit flat_multiset(const key_compare& __comp) + : __keys_(), __compare_(__comp) {} - _LIBCPP_HIDE_FROM_ABI explicit flat_multiset(container_type __keys, const key_compare& __comp = key_compare()) + _LIBCPP_HIDE_FROM_ABI + _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit flat_multiset(container_type __keys, const key_compare& __comp = key_compare()) : __keys_(std::move(__keys)), __compare_(__comp) { ranges::sort(__keys_, __compare_); } - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(sorted_equivalent_t, container_type __keys, const key_compare& __comp = key_compare()) : __keys_(std::move(__keys)), __compare_(__comp) { _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys_, __compare_), "Key container is not sorted"); @@ -149,7 +138,7 @@ public: template requires __has_input_iterator_category<_InputIterator>::value - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(_InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare()) : __keys_(), __compare_(__comp) { insert(__first, __last); @@ -157,48 +146,53 @@ public: template requires __has_input_iterator_category<_InputIterator>::value - _LIBCPP_HIDE_FROM_ABI flat_multiset( + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset( sorted_equivalent_t, _InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare()) : __keys_(__first, __last), __compare_(__comp) { _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys_, __compare_), "Key container is not sorted"); } template <_ContainerCompatibleRange _Range> - _LIBCPP_HIDE_FROM_ABI flat_multiset(from_range_t __fr, _Range&& __rg) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(from_range_t __fr, _Range&& __rg) : flat_multiset(__fr, std::forward<_Range>(__rg), key_compare()) {} template <_ContainerCompatibleRange _Range> - _LIBCPP_HIDE_FROM_ABI flat_multiset(from_range_t, _Range&& __rg, const key_compare& __comp) : flat_multiset(__comp) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multiset(from_range_t, _Range&& __rg, const key_compare& __comp) + : flat_multiset(__comp) { insert_range(std::forward<_Range>(__rg)); } - _LIBCPP_HIDE_FROM_ABI flat_multiset(initializer_list __il, const key_compare& __comp = key_compare()) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multiset(initializer_list __il, const key_compare& __comp = key_compare()) : flat_multiset(__il.begin(), __il.end(), __comp) {} - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(sorted_equivalent_t, initializer_list __il, const key_compare& __comp = key_compare()) : flat_multiset(sorted_equivalent, __il.begin(), __il.end(), __comp) {} template requires uses_allocator::value - _LIBCPP_HIDE_FROM_ABI explicit flat_multiset(const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit flat_multiset(const _Allocator& __alloc) : __keys_(std::make_obj_using_allocator(__alloc)), __compare_() {} template requires uses_allocator::value - _LIBCPP_HIDE_FROM_ABI flat_multiset(const key_compare& __comp, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multiset(const key_compare& __comp, const _Allocator& __alloc) : __keys_(std::make_obj_using_allocator(__alloc)), __compare_(__comp) {} template requires uses_allocator::value - _LIBCPP_HIDE_FROM_ABI flat_multiset(const container_type& __keys, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multiset(const container_type& __keys, const _Allocator& __alloc) : __keys_(std::make_obj_using_allocator(__alloc, __keys)), __compare_() { ranges::sort(__keys_, __compare_); } template requires uses_allocator::value - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(const container_type& __keys, const key_compare& __comp, const _Allocator& __alloc) : __keys_(std::make_obj_using_allocator(__alloc, __keys)), __compare_(__comp) { ranges::sort(__keys_, __compare_); @@ -206,14 +200,15 @@ public: template requires uses_allocator::value - _LIBCPP_HIDE_FROM_ABI flat_multiset(sorted_equivalent_t, const container_type& __keys, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multiset(sorted_equivalent_t, const container_type& __keys, const _Allocator& __alloc) : __keys_(std::make_obj_using_allocator(__alloc, __keys)), __compare_() { _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys_, __compare_), "Key container is not sorted"); } template requires uses_allocator::value - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(sorted_equivalent_t, const container_type& __keys, const key_compare& __comp, const _Allocator& __alloc) : __keys_(std::make_obj_using_allocator(__alloc, __keys)), __compare_(__comp) { _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys_, __compare_), "Key container is not sorted"); @@ -221,13 +216,14 @@ public: template requires uses_allocator::value - _LIBCPP_HIDE_FROM_ABI flat_multiset(const flat_multiset& __other, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multiset(const flat_multiset& __other, const _Allocator& __alloc) : __keys_(std::make_obj_using_allocator(__alloc, __other.__keys_)), __compare_(__other.__compare_) {} template requires uses_allocator::value - _LIBCPP_HIDE_FROM_ABI flat_multiset(flat_multiset&& __other, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(flat_multiset&& __other, const _Allocator& __alloc) # if _LIBCPP_HAS_EXCEPTIONS try # endif // _LIBCPP_HAS_EXCEPTIONS @@ -243,14 +239,15 @@ public: template requires(__has_input_iterator_category<_InputIterator>::value && uses_allocator::value) - _LIBCPP_HIDE_FROM_ABI flat_multiset(_InputIterator __first, _InputIterator __last, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multiset(_InputIterator __first, _InputIterator __last, const _Allocator& __alloc) : __keys_(std::make_obj_using_allocator(__alloc)), __compare_() { insert(__first, __last); } template requires(__has_input_iterator_category<_InputIterator>::value && uses_allocator::value) - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(_InputIterator __first, _InputIterator __last, const key_compare& __comp, const _Allocator& __alloc) : __keys_(std::make_obj_using_allocator(__alloc)), __compare_(__comp) { insert(__first, __last); @@ -258,7 +255,7 @@ public: template requires(__has_input_iterator_category<_InputIterator>::value && uses_allocator::value) - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(sorted_equivalent_t, _InputIterator __first, _InputIterator __last, const _Allocator& __alloc) : __keys_(std::make_obj_using_allocator(__alloc, __first, __last)), __compare_() { _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys_, __compare_), "Key container is not sorted"); @@ -266,53 +263,57 @@ public: template requires(__has_input_iterator_category<_InputIterator>::value && uses_allocator::value) - _LIBCPP_HIDE_FROM_ABI - flat_multiset(sorted_equivalent_t, - _InputIterator __first, - _InputIterator __last, - const key_compare& __comp, - const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset( + sorted_equivalent_t, + _InputIterator __first, + _InputIterator __last, + const key_compare& __comp, + const _Allocator& __alloc) : __keys_(std::make_obj_using_allocator(__alloc, __first, __last)), __compare_(__comp) { _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys_, __compare_), "Key container is not sorted"); } template <_ContainerCompatibleRange _Range, class _Allocator> requires uses_allocator::value - _LIBCPP_HIDE_FROM_ABI flat_multiset(from_range_t, _Range&& __rg, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multiset(from_range_t, _Range&& __rg, const _Allocator& __alloc) : __keys_(std::make_obj_using_allocator(__alloc)), __compare_() { insert_range(std::forward<_Range>(__rg)); } template <_ContainerCompatibleRange _Range, class _Allocator> requires uses_allocator::value - _LIBCPP_HIDE_FROM_ABI flat_multiset(from_range_t, _Range&& __rg, const key_compare& __comp, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multiset(from_range_t, _Range&& __rg, const key_compare& __comp, const _Allocator& __alloc) : __keys_(std::make_obj_using_allocator(__alloc)), __compare_(__comp) { insert_range(std::forward<_Range>(__rg)); } template requires uses_allocator::value - _LIBCPP_HIDE_FROM_ABI flat_multiset(initializer_list __il, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multiset(initializer_list __il, const _Allocator& __alloc) : flat_multiset(__il.begin(), __il.end(), __alloc) {} template requires uses_allocator::value - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(initializer_list __il, const key_compare& __comp, const _Allocator& __alloc) : flat_multiset(__il.begin(), __il.end(), __comp, __alloc) {} template requires uses_allocator::value - _LIBCPP_HIDE_FROM_ABI flat_multiset(sorted_equivalent_t, initializer_list __il, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_multiset(sorted_equivalent_t, initializer_list __il, const _Allocator& __alloc) : flat_multiset(sorted_equivalent, __il.begin(), __il.end(), __alloc) {} template requires uses_allocator::value - _LIBCPP_HIDE_FROM_ABI flat_multiset( + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset( sorted_equivalent_t, initializer_list __il, const key_compare& __comp, const _Allocator& __alloc) : flat_multiset(sorted_equivalent, __il.begin(), __il.end(), __comp, __alloc) {} - _LIBCPP_HIDE_FROM_ABI flat_multiset& operator=(initializer_list __il) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset& operator=(initializer_list __il) { clear(); insert(__il); return *this; @@ -321,9 +322,9 @@ public: // copy/move assignment are not specified in the spec (defaulted) // but move assignment can potentially leave moved from object in an inconsistent // state if an exception is thrown - _LIBCPP_HIDE_FROM_ABI flat_multiset& operator=(const flat_multiset&) = default; + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset& operator=(const flat_multiset&) = default; - _LIBCPP_HIDE_FROM_ABI flat_multiset& operator=(flat_multiset&& __other) noexcept( + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset& operator=(flat_multiset&& __other) noexcept( is_nothrow_move_assignable_v<_KeyContainer> && is_nothrow_move_assignable_v<_Compare>) { auto __clear_other_guard = std::__make_scope_guard([&]() noexcept { __other.clear() /* noexcept */; }); auto __clear_self_guard = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); @@ -334,30 +335,60 @@ public: } // iterators - _LIBCPP_HIDE_FROM_ABI iterator begin() noexcept { return iterator(std::as_const(__keys_).begin()); } - _LIBCPP_HIDE_FROM_ABI const_iterator begin() const noexcept { return const_iterator(__keys_.begin()); } - _LIBCPP_HIDE_FROM_ABI iterator end() noexcept { return iterator(std::as_const(__keys_).end()); } - _LIBCPP_HIDE_FROM_ABI const_iterator end() const noexcept { return const_iterator(__keys_.end()); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator begin() noexcept { + return iterator(std::as_const(__keys_).begin()); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator begin() const noexcept { + return const_iterator(__keys_.begin()); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator end() noexcept { + return iterator(std::as_const(__keys_).end()); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator end() const noexcept { + return const_iterator(__keys_.end()); + } - _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() noexcept { return reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() noexcept { return reverse_iterator(begin()); } - _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rbegin() noexcept { + return reverse_iterator(end()); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rbegin() const noexcept { + return const_reverse_iterator(end()); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rend() noexcept { + return reverse_iterator(begin()); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rend() const noexcept { + return const_reverse_iterator(begin()); + } - _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const noexcept { return begin(); } - _LIBCPP_HIDE_FROM_ABI const_iterator cend() const noexcept { return end(); } - _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const noexcept { return const_reverse_iterator(begin()); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cbegin() const noexcept { + return begin(); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cend() const noexcept { + return end(); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crbegin() const noexcept { + return const_reverse_iterator(end()); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crend() const noexcept { + return const_reverse_iterator(begin()); + } // capacity - [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __keys_.empty(); } - _LIBCPP_HIDE_FROM_ABI size_type size() const noexcept { return __keys_.size(); } - _LIBCPP_HIDE_FROM_ABI size_type max_size() const noexcept { return __keys_.max_size(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool empty() const noexcept { + return __keys_.empty(); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type size() const noexcept { + return __keys_.size(); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type max_size() const noexcept { + return __keys_.max_size(); + } // [flat.multiset.modifiers], modifiers template requires is_constructible_v - _LIBCPP_HIDE_FROM_ABI iterator emplace(_Args&&... __args) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator emplace(_Args&&... __args) { if constexpr (sizeof...(__args) == 1 && (is_same_v, _Key> && ...)) { return __emplace(std::forward<_Args>(__args)...); } else { @@ -367,7 +398,7 @@ public: template requires is_constructible_v - _LIBCPP_HIDE_FROM_ABI iterator emplace_hint(const_iterator __hint, _Args&&... __args) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator emplace_hint(const_iterator __hint, _Args&&... __args) { if constexpr (sizeof...(__args) == 1 && (is_same_v, _Key> && ...)) { return __emplace_hint(std::move(__hint), std::forward<_Args>(__args)...); } else { @@ -375,21 +406,23 @@ public: } } - _LIBCPP_HIDE_FROM_ABI iterator insert(const value_type& __x) { return emplace(__x); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const value_type& __x) { return emplace(__x); } - _LIBCPP_HIDE_FROM_ABI iterator insert(value_type&& __x) { return emplace(std::move(__x)); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(value_type&& __x) { + return emplace(std::move(__x)); + } - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, const value_type& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __hint, const value_type& __x) { return emplace_hint(__hint, __x); } - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, value_type&& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __hint, value_type&& __x) { return emplace_hint(__hint, std::move(__x)); } template requires __has_input_iterator_category<_InputIterator>::value - _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __first, _InputIterator __last) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(_InputIterator __first, _InputIterator __last) { if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) { __reserve(__last - __first); } @@ -398,7 +431,8 @@ public: template requires __has_input_iterator_category<_InputIterator>::value - _LIBCPP_HIDE_FROM_ABI void insert(sorted_equivalent_t, _InputIterator __first, _InputIterator __last) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + insert(sorted_equivalent_t, _InputIterator __first, _InputIterator __last) { if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) { __reserve(__last - __first); } @@ -407,7 +441,7 @@ public: } template <_ContainerCompatibleRange _Range> - _LIBCPP_HIDE_FROM_ABI void insert_range(_Range&& __range) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert_range(_Range&& __range) { if constexpr (ranges::sized_range<_Range>) { __reserve(ranges::size(__range)); } @@ -415,26 +449,38 @@ public: __append_sort_merge(std::forward<_Range>(__range)); } - _LIBCPP_HIDE_FROM_ABI void insert(initializer_list __il) { insert(__il.begin(), __il.end()); } + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert_range(sorted_equivalent_t, _Range&& __range) { + if constexpr (ranges::sized_range<_Range>) { + __reserve(ranges::size(__range)); + } - _LIBCPP_HIDE_FROM_ABI void insert(sorted_equivalent_t, initializer_list __il) { + __append_sort_merge(std::forward<_Range>(__range)); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(initializer_list __il) { + insert(__il.begin(), __il.end()); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + insert(sorted_equivalent_t, initializer_list __il) { insert(sorted_equivalent, __il.begin(), __il.end()); } - _LIBCPP_HIDE_FROM_ABI container_type extract() && { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 container_type extract() && { auto __guard = std::__make_scope_guard([&]() noexcept { clear() /* noexcept */; }); auto __ret = std::move(__keys_); return __ret; } - _LIBCPP_HIDE_FROM_ABI void replace(container_type&& __keys) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void replace(container_type&& __keys) { _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys, __compare_), "Key container is not sorted"); auto __guard = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); __keys_ = std::move(__keys); __guard.__complete(); } - _LIBCPP_HIDE_FROM_ABI iterator erase(iterator __position) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator erase(iterator __position) { auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); auto __key_iter = __keys_.erase(__position.__base()); __on_failure.__complete(); @@ -444,7 +490,7 @@ public: // The following overload is the same as the iterator overload // iterator erase(const_iterator __position); - _LIBCPP_HIDE_FROM_ABI size_type erase(const key_type& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type erase(const key_type& __x) { auto [__first, __last] = equal_range(__x); auto __res = __last - __first; erase(__first, __last); @@ -454,149 +500,170 @@ public: template requires(__is_transparent_v<_Compare> && !is_convertible_v<_Kp &&, iterator> && !is_convertible_v<_Kp &&, const_iterator>) - _LIBCPP_HIDE_FROM_ABI size_type erase(_Kp&& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type erase(_Kp&& __x) { auto [__first, __last] = equal_range(__x); auto __res = __last - __first; erase(__first, __last); return __res; } - _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __first, const_iterator __last) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator erase(const_iterator __first, const_iterator __last) { auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); auto __key_it = __keys_.erase(__first.__base(), __last.__base()); __on_failure.__complete(); return iterator(std::move(__key_it)); } - _LIBCPP_HIDE_FROM_ABI void swap(flat_multiset& __y) noexcept { - // warning: The spec has unconditional noexcept, which means that - // if any of the following functions throw an exception, - // std::terminate will be called - // This is discussed in P3567, which hasn't been voted on yet. + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + swap(flat_multiset& __y) noexcept(is_nothrow_swappable_v && is_nothrow_swappable_v) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { + clear() /* noexcept */; + __y.clear() /* noexcept */; + }); ranges::swap(__compare_, __y.__compare_); ranges::swap(__keys_, __y.__keys_); + __on_failure.__complete(); } - _LIBCPP_HIDE_FROM_ABI void clear() noexcept { __keys_.clear(); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void clear() noexcept { __keys_.clear(); } // observers - _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __compare_; } - _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const { return __compare_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 key_compare key_comp() const { return __compare_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare value_comp() const { + return __compare_; + } // map operations - _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __x) { return __find_impl(*this, __x); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const key_type& __x) { + return __find_impl(*this, __x); + } - _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __x) const { return __find_impl(*this, __x); } - - template - requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI iterator find(const _Kp& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const key_type& __x) const { return __find_impl(*this, __x); } template requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI const_iterator find(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const _Kp& __x) { return __find_impl(*this, __x); } - _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __x) const { + template + requires __is_transparent_v<_Compare> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const _Kp& __x) const { + return __find_impl(*this, __x); + } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const key_type& __x) const { auto [__first, __last] = equal_range(__x); return __last - __first; } template requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI size_type count(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const _Kp& __x) const { auto [__first, __last] = equal_range(__x); return __last - __first; } - _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __x) const { return find(__x) != end(); } - - template - requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI bool contains(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const key_type& __x) const { return find(__x) != end(); } - _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __x) { + template + requires __is_transparent_v<_Compare> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const _Kp& __x) const { + return find(__x) != end(); + } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const key_type& __x) { const auto& __keys = __keys_; return iterator(std::lower_bound(__keys.begin(), __keys.end(), __x, __compare_)); } - _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator + lower_bound(const key_type& __x) const { return const_iterator(std::lower_bound(__keys_.begin(), __keys_.end(), __x, __compare_)); } template requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _Kp& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const _Kp& __x) { const auto& __keys = __keys_; return iterator(std::lower_bound(__keys.begin(), __keys.end(), __x, __compare_)); } template requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const _Kp& __x) const { return const_iterator(std::lower_bound(__keys_.begin(), __keys_.end(), __x, __compare_)); } - _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const key_type& __x) { const auto& __keys = __keys_; return iterator(std::upper_bound(__keys.begin(), __keys.end(), __x, __compare_)); } - _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator + upper_bound(const key_type& __x) const { return const_iterator(std::upper_bound(__keys_.begin(), __keys_.end(), __x, __compare_)); } template requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _Kp& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const _Kp& __x) { const auto& __keys = __keys_; return iterator(std::upper_bound(__keys.begin(), __keys.end(), __x, __compare_)); } template requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const _Kp& __x) const { return const_iterator(std::upper_bound(__keys_.begin(), __keys_.end(), __x, __compare_)); } - _LIBCPP_HIDE_FROM_ABI pair equal_range(const key_type& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + equal_range(const key_type& __x) { return __equal_range_impl(*this, __x); } - _LIBCPP_HIDE_FROM_ABI pair equal_range(const key_type& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + equal_range(const key_type& __x) const { return __equal_range_impl(*this, __x); } template requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI pair equal_range(const _Kp& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + equal_range(const _Kp& __x) { return __equal_range_impl(*this, __x); } template requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI pair equal_range(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + equal_range(const _Kp& __x) const { return __equal_range_impl(*this, __x); } - friend _LIBCPP_HIDE_FROM_ABI bool operator==(const flat_multiset& __x, const flat_multiset& __y) { + friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool + operator==(const flat_multiset& __x, const flat_multiset& __y) { return ranges::equal(__x, __y); } - friend _LIBCPP_HIDE_FROM_ABI auto operator<=>(const flat_multiset& __x, const flat_multiset& __y) { + friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 auto + operator<=>(const flat_multiset& __x, const flat_multiset& __y) { return std::lexicographical_compare_three_way( __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); } - friend _LIBCPP_HIDE_FROM_ABI void swap(flat_multiset& __x, flat_multiset& __y) noexcept { __x.swap(__y); } + friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + swap(flat_multiset& __x, flat_multiset& __y) noexcept(noexcept(__x.swap(__y))) { + __x.swap(__y); + } private: template - _LIBCPP_HIDE_FROM_ABI void __append_sort_merge(_Args&&... __args) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __append_sort_merge(_Args&&... __args) { auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); size_type __old_size = size(); __flat_set_utils::__append(*this, std::forward<_Args>(__args)...); @@ -604,20 +671,20 @@ private: ranges::sort(__keys_.begin() + __old_size, __keys_.end(), __compare_); } else { _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( - ranges::is_sorted(__keys_ | ranges::views::drop(__old_size)), "Key container is not sorted"); + ranges::is_sorted(__keys_ | ranges::views::drop(__old_size), __compare_), "Key container is not sorted"); } ranges::inplace_merge(__keys_.begin(), __keys_.begin() + __old_size, __keys_.end(), __compare_); __on_failure.__complete(); } template - _LIBCPP_HIDE_FROM_ABI iterator __emplace(_Kp&& __key) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator __emplace(_Kp&& __key) { auto __it = upper_bound(__key); return __flat_set_utils::__emplace_exact_pos(*this, __it, std::forward<_Kp>(__key)); } template - _LIBCPP_HIDE_FROM_ABI iterator __emplace_hint(const_iterator __hint, _Kp&& __key) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator __emplace_hint(const_iterator __hint, _Kp&& __key) { auto __prev_larger = __hint != cbegin() && __compare_(__key, *std::prev(__hint)); auto __next_smaller = __hint != cend() && __compare_(*__hint, __key); @@ -649,7 +716,7 @@ private: } template - _LIBCPP_HIDE_FROM_ABI static auto __find_impl(_Self&& __self, const _Kp& __key) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto __find_impl(_Self&& __self, const _Kp& __key) { auto __it = __self.lower_bound(__key); auto __last = __self.end(); if (__it == __last || __self.__compare_(__key, *__it)) { @@ -659,29 +726,30 @@ private: } template - _LIBCPP_HIDE_FROM_ABI static auto __equal_range_impl(_Self&& __self, const _Kp& __key) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto __equal_range_impl(_Self&& __self, const _Kp& __key) { using __iter = _If>, const_iterator, iterator>; auto [__key_first, __key_last] = std::equal_range(__self.__keys_.begin(), __self.__keys_.end(), __key, __self.__compare_); return std::make_pair(__iter(__key_first), __iter(__key_last)); } - _LIBCPP_HIDE_FROM_ABI void __reserve(size_t __size) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __reserve(size_t __size) { if constexpr (__container_traits<_KeyContainer>::__reservable) { __keys_.reserve(__size); } } template - friend typename flat_multiset<_Key2, _Compare2, _KeyContainer2>::size_type + friend typename flat_multiset<_Key2, _Compare2, _KeyContainer2>::size_type _LIBCPP_CONSTEXPR_SINCE_CXX26 erase_if(flat_multiset<_Key2, _Compare2, _KeyContainer2>&, _Predicate); _KeyContainer __keys_; _LIBCPP_NO_UNIQUE_ADDRESS key_compare __compare_; struct __key_equiv { - _LIBCPP_HIDE_FROM_ABI __key_equiv(key_compare __c) : __comp_(__c) {} - _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __key_equiv(key_compare __c) : __comp_(__c) {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool + operator()(const_reference __x, const_reference __y) const { return !__comp_(std::get<0>(__x), std::get<0>(__y)) && !__comp_(std::get<0>(__y), std::get<0>(__x)); } key_compare __comp_; @@ -689,7 +757,7 @@ private: }; template > - requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && is_invocable_v) @@ -697,12 +765,12 @@ flat_multiset(_KeyContainer, _Compare = _Compare()) -> flat_multiset; template - requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator<_KeyContainer>::value) + requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator_v<_KeyContainer>) flat_multiset(_KeyContainer, _Allocator) -> flat_multiset, _KeyContainer>; template - requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && uses_allocator_v<_KeyContainer, _Allocator> && is_invocable_v flat_multiset; template > - requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && is_invocable_v) @@ -719,12 +787,12 @@ flat_multiset(sorted_equivalent_t, _KeyContainer, _Compare = _Compare()) -> flat_multiset; template - requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator<_KeyContainer>::value) + requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator_v<_KeyContainer>) flat_multiset(sorted_equivalent_t, _KeyContainer, _Allocator) -> flat_multiset, _KeyContainer>; template - requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && uses_allocator_v<_KeyContainer, _Allocator> && is_invocable_v flat_multiset(sorted_equivalent_t, _KeyContainer, _Compare, _Allocator) -> flat_multiset; -template >> - requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value) +template >> + requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator_v<_Compare>) flat_multiset(_InputIterator, _InputIterator, _Compare = _Compare()) - -> flat_multiset<__iter_value_type<_InputIterator>, _Compare>; + -> flat_multiset<__iterator_value_type<_InputIterator>, _Compare>; -template >> - requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value) +template >> + requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator_v<_Compare>) flat_multiset(sorted_equivalent_t, _InputIterator, _InputIterator, _Compare = _Compare()) - -> flat_multiset<__iter_value_type<_InputIterator>, _Compare>; + -> flat_multiset<__iterator_value_type<_InputIterator>, _Compare>; template >, class _Allocator = allocator>, - class = __enable_if_t::value && __is_allocator<_Allocator>::value>> + class = __enable_if_t && __is_allocator_v<_Allocator>>> flat_multiset(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator()) -> flat_multiset< ranges::range_value_t<_Range>, _Compare, vector, __allocator_traits_rebind_t<_Allocator, ranges::range_value_t<_Range>>>>; -template ::value>> +template >> flat_multiset(from_range_t, _Range&&, _Allocator) -> flat_multiset< ranges::range_value_t<_Range>, less>, vector, __allocator_traits_rebind_t<_Allocator, ranges::range_value_t<_Range>>>>; template > - requires(!__is_allocator<_Compare>::value) + requires(!__is_allocator_v<_Compare>) flat_multiset(initializer_list<_Key>, _Compare = _Compare()) -> flat_multiset<_Key, _Compare>; template > - requires(!__is_allocator<_Compare>::value) + requires(!__is_allocator_v<_Compare>) flat_multiset(sorted_equivalent_t, initializer_list<_Key>, _Compare = _Compare()) -> flat_multiset<_Key, _Compare>; template @@ -770,7 +838,7 @@ struct uses_allocator, _Allocator> : bool_constant > {}; template -_LIBCPP_HIDE_FROM_ABI typename flat_multiset<_Key, _Compare, _KeyContainer>::size_type +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 typename flat_multiset<_Key, _Compare, _KeyContainer>::size_type erase_if(flat_multiset<_Key, _Compare, _KeyContainer>& __flat_multiset, _Predicate __pred) { auto __guard = std::__make_exception_guard([&] { __flat_multiset.clear(); }); auto __it = diff --git a/lib/libcxx/include/__flat_set/flat_set.h b/lib/libcxx/include/__flat_set/flat_set.h index 95cb998459..57c3926e33 100644 --- a/lib/libcxx/include/__flat_set/flat_set.h +++ b/lib/libcxx/include/__flat_set/flat_set.h @@ -12,7 +12,6 @@ #include <__algorithm/lexicographical_compare_three_way.h> #include <__algorithm/lower_bound.h> -#include <__algorithm/min.h> #include <__algorithm/ranges_adjacent_find.h> #include <__algorithm/ranges_equal.h> #include <__algorithm/ranges_inplace_merge.h> @@ -24,20 +23,16 @@ #include <__compare/synth_three_way.h> #include <__concepts/swappable.h> #include <__config> -#include <__cstddef/ptrdiff_t.h> #include <__flat_map/sorted_unique.h> #include <__flat_set/ra_iterator.h> #include <__flat_set/utils.h> -#include <__functional/invoke.h> #include <__functional/is_transparent.h> #include <__functional/operations.h> #include <__fwd/vector.h> #include <__iterator/concepts.h> -#include <__iterator/distance.h> #include <__iterator/iterator_traits.h> #include <__iterator/next.h> #include <__iterator/prev.h> -#include <__iterator/ranges_iterator_traits.h> #include <__iterator/reverse_iterator.h> #include <__memory/allocator_traits.h> #include <__memory/uses_allocator.h> @@ -47,10 +42,7 @@ #include <__ranges/container_compatible_range.h> #include <__ranges/drop_view.h> #include <__ranges/from_range.h> -#include <__ranges/ref_view.h> #include <__ranges/size.h> -#include <__ranges/subrange.h> -#include <__type_traits/conjunction.h> #include <__type_traits/container_traits.h> #include <__type_traits/invoke.h> #include <__type_traits/is_allocator.h> @@ -347,38 +339,42 @@ public: } // iterators - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator begin() noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator begin() noexcept { return iterator(std::as_const(__keys_).begin()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator begin() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator begin() const noexcept { return const_iterator(__keys_.begin()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator end() noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator end() noexcept { return iterator(std::as_const(__keys_).end()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator end() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator end() const noexcept { return const_iterator(__keys_.end()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rbegin() noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rbegin() noexcept { return reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rbegin() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rend() noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rend() noexcept { return reverse_iterator(begin()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rend() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cbegin() const noexcept { return begin(); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cend() const noexcept { return end(); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crbegin() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cbegin() const noexcept { + return begin(); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cend() const noexcept { + return end(); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crend() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crend() const noexcept { return const_reverse_iterator(begin()); } @@ -387,9 +383,13 @@ public: return __keys_.empty(); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type size() const noexcept { return __keys_.size(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type size() const noexcept { + return __keys_.size(); + } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type max_size() const noexcept { return __keys_.max_size(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type max_size() const noexcept { + return __keys_.max_size(); + } // [flat.set.modifiers], modifiers template @@ -466,6 +466,15 @@ public: __append_sort_merge_unique(std::forward<_Range>(__range)); } + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert_range(std::sorted_unique_t, _Range&& __range) { + if constexpr (ranges::sized_range<_Range>) { + __reserve(ranges::size(__range)); + } + + __append_sort_merge_unique(std::forward<_Range>(__range)); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(initializer_list __il) { insert(__il.begin(), __il.end()); } @@ -474,7 +483,7 @@ public: insert(sorted_unique, __il.begin(), __il.end()); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 container_type extract() && { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 container_type extract() && { auto __guard = std::__make_scope_guard([&]() noexcept { clear() /* noexcept */; }); auto __ret = std::move(__keys_); return __ret; @@ -524,123 +533,131 @@ public: return iterator(std::move(__key_it)); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void swap(flat_set& __y) noexcept { - // warning: The spec has unconditional noexcept, which means that - // if any of the following functions throw an exception, - // std::terminate will be called. - // This is discussed in P2767, which hasn't been voted on yet. + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + swap(flat_set& __y) noexcept(is_nothrow_swappable_v && is_nothrow_swappable_v) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { + clear() /* noexcept */; + __y.clear() /* noexcept */; + }); ranges::swap(__compare_, __y.__compare_); ranges::swap(__keys_, __y.__keys_); + __on_failure.__complete(); } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void clear() noexcept { __keys_.clear(); } // observers - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 key_compare key_comp() const { return __compare_; } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare value_comp() const { return __compare_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 key_compare key_comp() const { return __compare_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare value_comp() const { + return __compare_; + } // set operations - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const key_type& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const key_type& __x) { return __find_impl(*this, __x); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const key_type& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const key_type& __x) const { return __find_impl(*this, __x); } template requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const _Kp& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const _Kp& __x) { return __find_impl(*this, __x); } template requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const _Kp& __x) const { return __find_impl(*this, __x); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const key_type& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const key_type& __x) const { return contains(__x) ? 1 : 0; } template requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const _Kp& __x) const { return contains(__x) ? 1 : 0; } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const key_type& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const key_type& __x) const { return find(__x) != end(); } template requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const _Kp& __x) const { return find(__x) != end(); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const key_type& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const key_type& __x) { const auto& __keys = __keys_; return iterator(std::lower_bound(__keys.begin(), __keys.end(), __x, __compare_)); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const key_type& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator + lower_bound(const key_type& __x) const { return const_iterator(std::lower_bound(__keys_.begin(), __keys_.end(), __x, __compare_)); } template requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const _Kp& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const _Kp& __x) { const auto& __keys = __keys_; return iterator(std::lower_bound(__keys.begin(), __keys.end(), __x, __compare_)); } template requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const _Kp& __x) const { return const_iterator(std::lower_bound(__keys_.begin(), __keys_.end(), __x, __compare_)); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const key_type& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const key_type& __x) { const auto& __keys = __keys_; return iterator(std::upper_bound(__keys.begin(), __keys.end(), __x, __compare_)); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const key_type& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator + upper_bound(const key_type& __x) const { return const_iterator(std::upper_bound(__keys_.begin(), __keys_.end(), __x, __compare_)); } template requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const _Kp& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const _Kp& __x) { const auto& __keys = __keys_; return iterator(std::upper_bound(__keys.begin(), __keys.end(), __x, __compare_)); } template requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const _Kp& __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const _Kp& __x) const { return const_iterator(std::upper_bound(__keys_.begin(), __keys_.end(), __x, __compare_)); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair equal_range(const key_type& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + equal_range(const key_type& __x) { return __equal_range_impl(*this, __x); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair equal_range(const key_type& __x) const { return __equal_range_impl(*this, __x); } template requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair equal_range(const _Kp& __x) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + equal_range(const _Kp& __x) { return __equal_range_impl(*this, __x); } template requires __is_transparent_v<_Compare> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair equal_range(const _Kp& __x) const { return __equal_range_impl(*this, __x); } @@ -655,13 +672,14 @@ public: __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); } - friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void swap(flat_set& __x, flat_set& __y) noexcept { + friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + swap(flat_set& __x, flat_set& __y) noexcept(noexcept(__x.swap(__y))) { __x.swap(__y); } private: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool __is_sorted_and_unique(auto&& __key_container) const { - auto __greater_or_equal_to = [this](const auto& __x, const auto& __y) { return !__compare_(__x, __y); }; + auto __greater_or_equal_to = [this](const auto& __x, const auto& __y) -> bool { return !__compare_(__x, __y); }; return ranges::adjacent_find(__key_container, __greater_or_equal_to) == ranges::end(__key_container); } @@ -774,19 +792,19 @@ private: }; template > - requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && is_invocable_v) flat_set(_KeyContainer, _Compare = _Compare()) -> flat_set; template - requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator<_KeyContainer>::value) + requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator_v<_KeyContainer>) flat_set(_KeyContainer, _Allocator) -> flat_set, _KeyContainer>; template - requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && uses_allocator_v<_KeyContainer, _Allocator> && is_invocable_v flat_set(_KeyContainer, _Compare, _Allocator) -> flat_set; template > - requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && is_invocable_v) @@ -802,12 +820,12 @@ flat_set(sorted_unique_t, _KeyContainer, _Compare = _Compare()) -> flat_set; template - requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator<_KeyContainer>::value) + requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator_v<_KeyContainer>) flat_set(sorted_unique_t, _KeyContainer, _Allocator) -> flat_set, _KeyContainer>; template - requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && uses_allocator_v<_KeyContainer, _Allocator> && is_invocable_v flat_set(sorted_unique_t, _KeyContainer, _Compare, _Allocator) -> flat_set; -template >> - requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value) +template >> + requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator_v<_Compare>) flat_set(_InputIterator, _InputIterator, _Compare = _Compare()) - -> flat_set<__iter_value_type<_InputIterator>, _Compare>; + -> flat_set<__iterator_value_type<_InputIterator>, _Compare>; -template >> - requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value) +template >> + requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator_v<_Compare>) flat_set(sorted_unique_t, _InputIterator, _InputIterator, _Compare = _Compare()) - -> flat_set<__iter_value_type<_InputIterator>, _Compare>; + -> flat_set<__iterator_value_type<_InputIterator>, _Compare>; template >, class _Allocator = allocator>, - class = __enable_if_t::value && __is_allocator<_Allocator>::value>> + class = __enable_if_t && __is_allocator_v<_Allocator>>> flat_set(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator()) -> flat_set< ranges::range_value_t<_Range>, _Compare, vector, __allocator_traits_rebind_t<_Allocator, ranges::range_value_t<_Range>>>>; -template ::value>> +template >> flat_set(from_range_t, _Range&&, _Allocator) -> flat_set< ranges::range_value_t<_Range>, less>, vector, __allocator_traits_rebind_t<_Allocator, ranges::range_value_t<_Range>>>>; template > - requires(!__is_allocator<_Compare>::value) + requires(!__is_allocator_v<_Compare>) flat_set(initializer_list<_Key>, _Compare = _Compare()) -> flat_set<_Key, _Compare>; template > - requires(!__is_allocator<_Compare>::value) + requires(!__is_allocator_v<_Compare>) flat_set(sorted_unique_t, initializer_list<_Key>, _Compare = _Compare()) -> flat_set<_Key, _Compare>; template diff --git a/lib/libcxx/include/__format/concepts.h b/lib/libcxx/include/__format/concepts.h index 28297c612d..5b603701c0 100644 --- a/lib/libcxx/include/__format/concepts.h +++ b/lib/libcxx/include/__format/concepts.h @@ -15,12 +15,8 @@ #include <__config> #include <__format/format_parse_context.h> #include <__fwd/format.h> -#include <__fwd/tuple.h> -#include <__tuple/tuple_size.h> -#include <__type_traits/is_specialization.h> #include <__type_traits/remove_const.h> #include <__type_traits/remove_reference.h> -#include <__utility/pair.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -65,16 +61,6 @@ concept __formattable = # if _LIBCPP_STD_VER >= 23 template concept formattable = __formattable<_Tp, _CharT>; - -// [tuple.like] defines a tuple-like exposition only concept. This concept is -// not related to that. Therefore it uses a different name for the concept. -// -// TODO FMT Add a test to validate we fail when using that concept after P2165 -// has been implemented. -template -concept __fmt_pair_like = - __is_specialization_v<_Tp, pair> || (__is_specialization_v<_Tp, tuple> && tuple_size_v<_Tp> == 2); - # endif // _LIBCPP_STD_VER >= 23 #endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__format/extended_grapheme_cluster_table.h b/lib/libcxx/include/__format/extended_grapheme_cluster_table.h index f76e018df7..6da07862d4 100644 --- a/lib/libcxx/include/__format/extended_grapheme_cluster_table.h +++ b/lib/libcxx/include/__format/extended_grapheme_cluster_table.h @@ -61,7 +61,7 @@ #ifndef _LIBCPP___FORMAT_EXTENDED_GRAPHEME_CLUSTER_TABLE_H #define _LIBCPP___FORMAT_EXTENDED_GRAPHEME_CLUSTER_TABLE_H -#include <__algorithm/ranges_upper_bound.h> +#include <__algorithm/upper_bound.h> #include <__config> #include <__cstddef/ptrdiff_t.h> #include <__iterator/access.h> @@ -1647,7 +1647,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1501] = { // size. Then the upper bound for code point 3 will return the entry after // 0x1810. After moving to the previous entry the algorithm arrives at the // correct entry. - ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 11) | 0x7ffu) - __entries; + ptrdiff_t __i = + std::upper_bound(std::begin(__entries), std::end(__entries), (__code_point << 11) | 0x7ffu) - __entries; if (__i == 0) return __property::__none; diff --git a/lib/libcxx/include/__format/fmt_pair_like.h b/lib/libcxx/include/__format/fmt_pair_like.h new file mode 100644 index 0000000000..d2f2f54d5a --- /dev/null +++ b/lib/libcxx/include/__format/fmt_pair_like.h @@ -0,0 +1,42 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FORMAT_FMT_PAIR_LIKE_H +#define _LIBCPP___FORMAT_FMT_PAIR_LIKE_H + +#include <__config> +#include <__fwd/pair.h> +#include <__fwd/tuple.h> +#include <__tuple/tuple_size.h> +#include <__type_traits/is_specialization.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 23 + +// [tuple.like] defines a tuple-like exposition only concept. This concept is not related to that. Therefore it uses a +// different name for the concept. +// +// TODO FMT Add a test to validate we fail when using that concept after P2165 has been implemented. + +// [format.range.fmtkind]/2.2.1 and [tab:formatter.range.type]: +// "U is either a specialization of pair or a specialization of tuple such that tuple_size_v is 2." +template +concept __fmt_pair_like = + __is_specialization_v<_Tp, pair> || (__is_specialization_v<_Tp, tuple> && tuple_size_v<_Tp> == 2); + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FORMAT_FMT_PAIR_LIKE_H diff --git a/lib/libcxx/include/__format/format_arg.h b/lib/libcxx/include/__format/format_arg.h index ed5e76275e..19794f0f08 100644 --- a/lib/libcxx/include/__format/format_arg.h +++ b/lib/libcxx/include/__format/format_arg.h @@ -149,7 +149,7 @@ _LIBCPP_HIDE_FROM_ABI decltype(auto) __visit_format_arg(_Visitor&& __vis, basic_ __libcpp_unreachable(); } -# if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER +# if _LIBCPP_STD_VER >= 26 template _LIBCPP_HIDE_FROM_ABI _Rp __visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) { @@ -200,7 +200,7 @@ _LIBCPP_HIDE_FROM_ABI _Rp __visit_format_arg(_Visitor&& __vis, basic_format_arg< __libcpp_unreachable(); } -# endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER +# endif // _LIBCPP_STD_VER >= 26 /// Contains the values used in basic_format_arg. /// @@ -285,7 +285,7 @@ public: _LIBCPP_HIDE_FROM_ABI explicit operator bool() const noexcept { return __type_ != __format::__arg_t::__none; } -# if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER +# if _LIBCPP_STD_VER >= 26 // This function is user facing, so it must wrap the non-standard types of // the "variant" in a handle to stay conforming. See __arg_t for more details. @@ -329,7 +329,7 @@ public: } } -# endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER +# endif // _LIBCPP_STD_VER >= 26 private: using char_type = typename _Context::char_type; @@ -371,11 +371,8 @@ private: // This function is user facing, so it must wrap the non-standard types of // the "variant" in a handle to stay conforming. See __arg_t for more details. template -# if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER -_LIBCPP_DEPRECATED_IN_CXX26 -# endif - _LIBCPP_HIDE_FROM_ABI decltype(auto) - visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) { +_LIBCPP_DEPRECATED_IN_CXX26 _LIBCPP_HIDE_FROM_ABI decltype(auto) +visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) { switch (__arg.__type_) { # if _LIBCPP_HAS_INT128 case __format::__arg_t::__i128: { @@ -387,7 +384,7 @@ _LIBCPP_DEPRECATED_IN_CXX26 typename __basic_format_arg_value<_Context>::__handle __h{__arg.__value_.__u128_}; return std::invoke(std::forward<_Visitor>(__vis), typename basic_format_arg<_Context>::handle{__h}); } -# endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER +# endif // _LIBCPP_HAS_INT128 default: return std::__visit_format_arg(std::forward<_Visitor>(__vis), __arg); } diff --git a/lib/libcxx/include/__format/format_args.h b/lib/libcxx/include/__format/format_args.h index 9dd7a5ed9c..f1b648a10a 100644 --- a/lib/libcxx/include/__format/format_args.h +++ b/lib/libcxx/include/__format/format_args.h @@ -40,7 +40,7 @@ public: } } - _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> get(size_t __id) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> get(size_t __id) const noexcept { if (__id >= __size_) return basic_format_arg<_Context>{}; diff --git a/lib/libcxx/include/__format/format_context.h b/lib/libcxx/include/__format/format_context.h index e672ee7ad0..9732ea9bf7 100644 --- a/lib/libcxx/include/__format/format_context.h +++ b/lib/libcxx/include/__format/format_context.h @@ -80,17 +80,17 @@ public: template using formatter_type = formatter<_Tp, _CharT>; - _LIBCPP_HIDE_FROM_ABI basic_format_arg arg(size_t __id) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_format_arg arg(size_t __id) const noexcept { return __args_.get(__id); } # if _LIBCPP_HAS_LOCALIZATION - _LIBCPP_HIDE_FROM_ABI std::locale locale() { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::locale locale() { if (!__loc_) __loc_ = std::locale{}; return *__loc_; } # endif - _LIBCPP_HIDE_FROM_ABI iterator out() { return std::move(__out_it_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI iterator out() { return std::move(__out_it_); } _LIBCPP_HIDE_FROM_ABI void advance_to(iterator __it) { __out_it_ = std::move(__it); } private: @@ -175,13 +175,13 @@ public: __format::__determine_arg_t(), __basic_format_arg_value(__arg)}; }; -# if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER +# if _LIBCPP_STD_VER >= 26 return static_cast<_Context*>(__c)->arg(__id).visit(std::move(__visitor)); # else _LIBCPP_SUPPRESS_DEPRECATED_PUSH return std::visit_format_arg(std::move(__visitor), static_cast<_Context*>(__c)->arg(__id)); _LIBCPP_SUPPRESS_DEPRECATED_POP -# endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER +# endif // _LIBCPP_STD_VER >= 26 }) { } diff --git a/lib/libcxx/include/__format/format_parse_context.h b/lib/libcxx/include/__format/format_parse_context.h index 67b90c7b7e..2eda9d7f1f 100644 --- a/lib/libcxx/include/__format/format_parse_context.h +++ b/lib/libcxx/include/__format/format_parse_context.h @@ -41,8 +41,8 @@ public: basic_format_parse_context(const basic_format_parse_context&) = delete; basic_format_parse_context& operator=(const basic_format_parse_context&) = delete; - _LIBCPP_HIDE_FROM_ABI constexpr const_iterator begin() const noexcept { return __begin_; } - _LIBCPP_HIDE_FROM_ABI constexpr const_iterator end() const noexcept { return __end_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const_iterator begin() const noexcept { return __begin_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const_iterator end() const noexcept { return __end_; } _LIBCPP_HIDE_FROM_ABI constexpr void advance_to(const_iterator __it) { __begin_ = __it; } _LIBCPP_HIDE_FROM_ABI constexpr size_t next_arg_id() { diff --git a/lib/libcxx/include/__format/formatter_output.h b/lib/libcxx/include/__format/formatter_output.h index cc74e3858a..63dd7fcacd 100644 --- a/lib/libcxx/include/__format/formatter_output.h +++ b/lib/libcxx/include/__format/formatter_output.h @@ -45,7 +45,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace __formatter { -struct _LIBCPP_EXPORTED_FROM_ABI __padding_size_result { +struct __padding_size_result { size_t __before_; size_t __after_; }; @@ -151,45 +151,41 @@ _LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, _CharT __value) } } +template <__fmt_char_type _CharT, output_iterator _OutIt> +_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) { # if _LIBCPP_HAS_UNICODE -template <__fmt_char_type _CharT, output_iterator _OutIt> - requires(same_as<_CharT, char>) -_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) { - std::size_t __bytes = std::countl_one(static_cast(__value.__data[0])); - if (__bytes == 0) - return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]); - - for (size_t __i = 0; __i < __n; ++__i) - __out_it = __formatter::__copy( - std::addressof(__value.__data[0]), std::addressof(__value.__data[0]) + __bytes, std::move(__out_it)); - return __out_it; -} + if constexpr (same_as<_CharT, char>) { + std::size_t __bytes = std::countl_one(static_cast(__value.__data[0])); + if (__bytes == 0) + return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]); + for (size_t __i = 0; __i < __n; ++__i) + __out_it = __formatter::__copy( + std::addressof(__value.__data[0]), std::addressof(__value.__data[0]) + __bytes, std::move(__out_it)); + return __out_it; # if _LIBCPP_HAS_WIDE_CHARACTERS -template <__fmt_char_type _CharT, output_iterator _OutIt> - requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) -_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) { - if (!__unicode::__is_high_surrogate(__value.__data[0])) - return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]); + } else if constexpr (same_as<_CharT, wchar_t>) { + if constexpr (sizeof(wchar_t) == 2) { + if (!__unicode::__is_high_surrogate(__value.__data[0])) + return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]); - for (size_t __i = 0; __i < __n; ++__i) - __out_it = __formatter::__copy( - std::addressof(__value.__data[0]), std::addressof(__value.__data[0]) + 2, std::move(__out_it)); - return __out_it; -} - -template <__fmt_char_type _CharT, output_iterator _OutIt> - requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) -_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) { - return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]); -} + for (size_t __i = 0; __i < __n; ++__i) + __out_it = __formatter::__copy( + std::addressof(__value.__data[0]), std::addressof(__value.__data[0]) + 2, std::move(__out_it)); + return __out_it; + } else if constexpr (sizeof(wchar_t) == 4) { + return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]); + } else { + static_assert(false, "expected sizeof(wchar_t) to be 2 or 4"); + } # endif // _LIBCPP_HAS_WIDE_CHARACTERS -# else // _LIBCPP_HAS_UNICODE -template <__fmt_char_type _CharT, output_iterator _OutIt> -_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) { + } else { + static_assert(false, "Unexpected CharT"); + } +# else // _LIBCPP_HAS_UNICODE return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]); +# endif // _LIBCPP_HAS_UNICODE } -# endif // _LIBCPP_HAS_UNICODE /// Writes the input to the output with the required padding. /// diff --git a/lib/libcxx/include/__format/indic_conjunct_break_table.h b/lib/libcxx/include/__format/indic_conjunct_break_table.h index f48ea62590..d85782d732 100644 --- a/lib/libcxx/include/__format/indic_conjunct_break_table.h +++ b/lib/libcxx/include/__format/indic_conjunct_break_table.h @@ -61,7 +61,7 @@ #ifndef _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H #define _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H -#include <__algorithm/ranges_upper_bound.h> +#include <__algorithm/upper_bound.h> #include <__config> #include <__cstddef/ptrdiff_t.h> #include <__iterator/access.h> @@ -531,7 +531,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[403] = { // size. Then the upper bound for code point 3 will return the entry after // 0x1810. After moving to the previous entry the algorithm arrives at the // correct entry. - ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 11) | 0x7ffu) - __entries; + ptrdiff_t __i = + std::upper_bound(std::begin(__entries), std::end(__entries), (__code_point << 11) | 0x7ffu) - __entries; if (__i == 0) return __property::__none; diff --git a/lib/libcxx/include/__format/range_default_formatter.h b/lib/libcxx/include/__format/range_default_formatter.h index 7149debb2f..2d2190657b 100644 --- a/lib/libcxx/include/__format/range_default_formatter.h +++ b/lib/libcxx/include/__format/range_default_formatter.h @@ -16,10 +16,11 @@ #include <__algorithm/ranges_copy.h> #include <__chrono/statically_widen.h> -#include <__concepts/same_as.h> #include <__config> #include <__format/concepts.h> +#include <__format/fmt_pair_like.h> #include <__format/formatter.h> +#include <__format/range_format.h> #include <__format/range_formatter.h> #include <__iterator/back_insert_iterator.h> #include <__ranges/concepts.h> @@ -42,51 +43,11 @@ concept __const_formattable_range = template using __fmt_maybe_const _LIBCPP_NODEBUG = conditional_t<__const_formattable_range<_Rp, _CharT>, const _Rp, _Rp>; -_LIBCPP_DIAGNOSTIC_PUSH -_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wshadow") -_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wshadow") -// This shadows map, set, and string. -enum class range_format { disabled, map, set, sequence, string, debug_string }; -_LIBCPP_DIAGNOSTIC_POP - // There is no definition of this struct, it's purely intended to be used to // generate diagnostics. template struct __instantiated_the_primary_template_of_format_kind; -template -constexpr range_format format_kind = [] { - // [format.range.fmtkind]/1 - // A program that instantiates the primary template of format_kind is ill-formed. - static_assert(sizeof(_Rp) != sizeof(_Rp), "create a template specialization of format_kind for your type"); - return range_format::disabled; -}(); - -template - requires same_as<_Rp, remove_cvref_t<_Rp>> -inline constexpr range_format format_kind<_Rp> = [] { - // [format.range.fmtkind]/2 - - // 2.1 If same_as>, R> is true, - // Otherwise format_kind is range_format::disabled. - if constexpr (same_as>, _Rp>) - return range_format::disabled; - // 2.2 Otherwise, if the qualified-id R::key_type is valid and denotes a type: - else if constexpr (requires { typename _Rp::key_type; }) { - // 2.2.1 If the qualified-id R::mapped_type is valid and denotes a type ... - if constexpr (requires { typename _Rp::mapped_type; } && - // 2.2.1 ... If either U is a specialization of pair or U is a specialization - // of tuple and tuple_size_v == 2 - __fmt_pair_like>>) - return range_format::map; - else - // 2.2.2 Otherwise format_kind is range_format::set. - return range_format::set; - } else - // 2.3 Otherwise, format_kind is range_format::sequence. - return range_format::sequence; -}(); - template struct __range_default_formatter; diff --git a/lib/libcxx/include/__format/range_format.h b/lib/libcxx/include/__format/range_format.h new file mode 100644 index 0000000000..fe43923f9d --- /dev/null +++ b/lib/libcxx/include/__format/range_format.h @@ -0,0 +1,71 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FORMAT_RANGE_FORMAT_H +#define _LIBCPP___FORMAT_RANGE_FORMAT_H + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#include <__concepts/same_as.h> +#include <__config> +#include <__format/fmt_pair_like.h> +#include <__ranges/concepts.h> +#include <__type_traits/remove_cvref.h> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 23 + +_LIBCPP_DIAGNOSTIC_PUSH +_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wshadow") +_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wshadow") +// This shadows map, set, and string. +enum class range_format { disabled, map, set, sequence, string, debug_string }; +_LIBCPP_DIAGNOSTIC_POP + +template +constexpr range_format format_kind = [] { + // [format.range.fmtkind]/1 + // A program that instantiates the primary template of format_kind is ill-formed. + static_assert(sizeof(_Rp) != sizeof(_Rp), "create a template specialization of format_kind for your type"); + return range_format::disabled; +}(); + +template + requires same_as<_Rp, remove_cvref_t<_Rp>> +inline constexpr range_format format_kind<_Rp> = [] { + // [format.range.fmtkind]/2 + + // 2.1 If same_as>, R> is true, + // Otherwise format_kind is range_format::disabled. + if constexpr (same_as>, _Rp>) + return range_format::disabled; + // 2.2 Otherwise, if the qualified-id R::key_type is valid and denotes a type: + else if constexpr (requires { typename _Rp::key_type; }) { + // 2.2.1 If the qualified-id R::mapped_type is valid and denotes a type ... + if constexpr (requires { typename _Rp::mapped_type; } && + // 2.2.1 ... If either U is a specialization of pair or U is a specialization + // of tuple and tuple_size_v == 2 + __fmt_pair_like>>) + return range_format::map; + else + // 2.2.2 Otherwise format_kind is range_format::set. + return range_format::set; + } else + // 2.3 Otherwise, format_kind is range_format::sequence. + return range_format::sequence; +}(); + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_END_NAMESPACE_STD + +#endif diff --git a/lib/libcxx/include/__format/range_formatter.h b/lib/libcxx/include/__format/range_formatter.h index 0d7fe9970c..06d2b4cb4b 100644 --- a/lib/libcxx/include/__format/range_formatter.h +++ b/lib/libcxx/include/__format/range_formatter.h @@ -20,6 +20,7 @@ #include <__config> #include <__format/buffer.h> #include <__format/concepts.h> +#include <__format/fmt_pair_like.h> #include <__format/format_context.h> #include <__format/format_error.h> #include <__format/formatter.h> diff --git a/lib/libcxx/include/__format/width_estimation_table.h b/lib/libcxx/include/__format/width_estimation_table.h index 0ea0b4f413..ae10a77a5b 100644 --- a/lib/libcxx/include/__format/width_estimation_table.h +++ b/lib/libcxx/include/__format/width_estimation_table.h @@ -61,9 +61,10 @@ #ifndef _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H #define _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H -#include <__algorithm/ranges_upper_bound.h> +#include <__algorithm/upper_bound.h> #include <__config> #include <__cstddef/ptrdiff_t.h> +#include <__iterator/access.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -255,7 +256,8 @@ inline constexpr uint32_t __table_upper_bound = 0x0003fffd; if (__code_point < (__entries[0] >> 14)) return 1; - ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 14) | 0x3fffu) - __entries; + ptrdiff_t __i = + std::upper_bound(std::begin(__entries), std::end(__entries), (__code_point << 14) | 0x3fffu) - __entries; if (__i == 0) return 1; diff --git a/lib/libcxx/include/__functional/bind.h b/lib/libcxx/include/__functional/bind.h index 596cce03cd..cbe8660b82 100644 --- a/lib/libcxx/include/__functional/bind.h +++ b/lib/libcxx/include/__functional/bind.h @@ -81,17 +81,12 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp& __mu(reference_w return __t.get(); } -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __invoke_result_t<_Ti&, _Uj...> -__mu_expand(_Ti& __ti, tuple<_Uj...>& __uj, __tuple_indices<_Indx...>) { - return __ti(std::forward<_Uj>(std::get<_Indx>(__uj))...); -} - template ::value, int> = 0> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __invoke_result_t<_Ti&, _Uj...> __mu(_Ti& __ti, tuple<_Uj...>& __uj) { - typedef typename __make_tuple_indices::type __indices; - return std::__mu_expand(__ti, __uj, __indices()); + return [&](__index_sequence<_Indices...>) -> __invoke_result_t<_Ti&, _Uj...> { + return __ti(std::forward<_Uj>(std::get<_Indices>(__uj))...); + }(__index_sequence_for<_Uj...>{}); } template @@ -191,7 +186,7 @@ struct __bind_return<_Fp, const tuple<_BoundArgs...>, _TupleUj, true> { template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 typename __bind_return<_Fp, _BoundArgs, _Args>::type -__apply_functor(_Fp& __f, _BoundArgs& __bound_args, __tuple_indices<_Indx...>, _Args&& __args) { +__apply_functor(_Fp& __f, _BoundArgs& __bound_args, __index_sequence<_Indx...>, _Args&& __args) { return std::__invoke(__f, std::__mu(std::get<_Indx>(__bound_args), __args)...); } @@ -205,8 +200,6 @@ private: _Fd __f_; _Td __bound_args_; - typedef typename __make_tuple_indices::type __indices; - public: template < class _Gp, @@ -219,14 +212,16 @@ public: template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 typename __bind_return<_Fd, _Td, tuple<_Args&&...> >::type operator()(_Args&&... __args) { - return std::__apply_functor(__f_, __bound_args_, __indices(), tuple<_Args&&...>(std::forward<_Args>(__args)...)); + return std::__apply_functor( + __f_, __bound_args_, __index_sequence_for<_BoundArgs...>(), tuple<_Args&&...>(std::forward<_Args>(__args)...)); } template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 typename __bind_return >::type operator()(_Args&&... __args) const { - return std::__apply_functor(__f_, __bound_args_, __indices(), tuple<_Args&&...>(std::forward<_Args>(__args)...)); + return std::__apply_functor( + __f_, __bound_args_, __index_sequence_for<_BoundArgs...>(), tuple<_Args&&...>(std::forward<_Args>(__args)...)); } }; @@ -273,14 +268,14 @@ template struct is_bind_expression<__bind_r<_Rp, _Fp, _BoundArgs...> > : public true_type {}; template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bind<_Fp, _BoundArgs...> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bind<_Fp, _BoundArgs...> bind(_Fp&& __f, _BoundArgs&&... __bound_args) { typedef __bind<_Fp, _BoundArgs...> type; return type(std::forward<_Fp>(__f), std::forward<_BoundArgs>(__bound_args)...); } template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bind_r<_Rp, _Fp, _BoundArgs...> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bind_r<_Rp, _Fp, _BoundArgs...> bind(_Fp&& __f, _BoundArgs&&... __bound_args) { typedef __bind_r<_Rp, _Fp, _BoundArgs...> type; return type(std::forward<_Fp>(__f), std::forward<_BoundArgs>(__bound_args)...); diff --git a/lib/libcxx/include/__functional/bind_back.h b/lib/libcxx/include/__functional/bind_back.h index e44768d228..41177144d8 100644 --- a/lib/libcxx/include/__functional/bind_back.h +++ b/lib/libcxx/include/__functional/bind_back.h @@ -64,7 +64,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr auto __bind_back(_Fn&& __f, _Args&&... __args) n # if _LIBCPP_STD_VER >= 23 template -_LIBCPP_HIDE_FROM_ABI constexpr auto bind_back(_Fn&& __f, _Args&&... __args) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto bind_back(_Fn&& __f, _Args&&... __args) { static_assert(is_constructible_v, _Fn>, "bind_back requires decay_t to be constructible from F"); static_assert(is_move_constructible_v>, "bind_back requires decay_t to be move constructible"); static_assert((is_constructible_v, _Args> && ...), diff --git a/lib/libcxx/include/__functional/bind_front.h b/lib/libcxx/include/__functional/bind_front.h index 87ef3affe8..427accf963 100644 --- a/lib/libcxx/include/__functional/bind_front.h +++ b/lib/libcxx/include/__functional/bind_front.h @@ -43,7 +43,7 @@ struct __bind_front_t : __perfect_forward<__bind_front_op, _Fn, _BoundArgs...> { template requires is_constructible_v, _Fn> && is_move_constructible_v> && (is_constructible_v, _Args> && ...) && (is_move_constructible_v> && ...) -_LIBCPP_HIDE_FROM_ABI constexpr auto bind_front(_Fn&& __f, _Args&&... __args) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto bind_front(_Fn&& __f, _Args&&... __args) { return __bind_front_t, decay_t<_Args>...>(std::forward<_Fn>(__f), std::forward<_Args>(__args)...); } diff --git a/lib/libcxx/include/__functional/function.h b/lib/libcxx/include/__functional/function.h index dc112ebfd0..121417f90f 100644 --- a/lib/libcxx/include/__functional/function.h +++ b/lib/libcxx/include/__functional/function.h @@ -15,16 +15,14 @@ #include <__cstddef/nullptr_t.h> #include <__exception/exception.h> #include <__functional/binary_function.h> -#include <__functional/invoke.h> #include <__functional/unary_function.h> #include <__memory/addressof.h> #include <__type_traits/aligned_storage.h> #include <__type_traits/decay.h> -#include <__type_traits/is_core_convertible.h> +#include <__type_traits/invoke.h> #include <__type_traits/is_scalar.h> #include <__type_traits/is_trivially_constructible.h> #include <__type_traits/is_trivially_destructible.h> -#include <__type_traits/is_void.h> #include <__type_traits/strip_signature.h> #include <__utility/forward.h> #include <__utility/move.h> @@ -95,29 +93,29 @@ template struct __maybe_derive_from_binary_function<_Rp(_A1, _A2)> : public __binary_function<_A1, _A2, _Rp> {}; template -_LIBCPP_HIDE_FROM_ABI bool __not_null(_Fp const&) { - return true; +_LIBCPP_HIDE_FROM_ABI bool __is_null(_Fp const&) { + return false; } template -_LIBCPP_HIDE_FROM_ABI bool __not_null(_Fp* __ptr) { - return __ptr; +_LIBCPP_HIDE_FROM_ABI bool __is_null(_Fp* __ptr) { + return !__ptr; } template -_LIBCPP_HIDE_FROM_ABI bool __not_null(_Ret _Class::*__ptr) { - return __ptr; +_LIBCPP_HIDE_FROM_ABI bool __is_null(_Ret _Class::* __ptr) { + return !__ptr; } template -_LIBCPP_HIDE_FROM_ABI bool __not_null(function<_Fp> const& __f) { - return !!__f; +_LIBCPP_HIDE_FROM_ABI bool __is_null(function<_Fp> const& __f) { + return !__f; } # if __has_extension(blocks) template -_LIBCPP_HIDE_FROM_ABI bool __not_null(_Rp (^__p)(_Args...)) { - return __p; +_LIBCPP_HIDE_FROM_ABI bool __is_null(_Rp (^__p)(_Args...)) { + return !__p; } # endif @@ -206,12 +204,13 @@ public: _LIBCPP_HIDE_FROM_ABI explicit __value_func(_Fp&& __f) : __f_(nullptr) { typedef __function::__func<_Fp, _Rp(_ArgTypes...)> _Fun; - if (__function::__not_null(__f)) { - if (sizeof(_Fun) <= sizeof(__buf_) && is_nothrow_copy_constructible<_Fp>::value) { - __f_ = ::new (std::addressof(__buf_)) _Fun(std::move(__f)); - } else { - __f_ = new _Fun(std::move(__f)); - } + if (__function::__is_null(__f)) + return; + + if (sizeof(_Fun) <= sizeof(__buf_) && is_nothrow_copy_constructible<_Fp>::value) { + __f_ = ::new (std::addressof(__buf_)) _Fun(std::move(__f)); + } else { + __f_ = new _Fun(std::move(__f)); } } @@ -356,7 +355,31 @@ struct __policy { // type. template _LIBCPP_HIDE_FROM_ABI static const __policy* __create() { - return __choose_policy<_Fun>(__use_small_storage<_Fun>()); + if constexpr (__use_small_storage<_Fun>::value) { + static constexpr __policy __policy = { + nullptr, + nullptr, + false, +# if _LIBCPP_HAS_RTTI + &typeid(_Fun) +# else + nullptr +# endif + }; + return &__policy; + } else { + static constexpr __policy __policy = { + std::addressof(__large_clone<_Fun>), + std::addressof(__large_destroy<_Fun>), + false, +# if _LIBCPP_HAS_RTTI + &typeid(_Fun) +# else + nullptr +# endif + }; + return &__policy; + } } _LIBCPP_HIDE_FROM_ABI static const __policy* __create_empty() { @@ -384,36 +407,6 @@ private: _LIBCPP_HIDE_FROM_ABI static void __large_destroy(void* __s) { delete static_cast<_Fun*>(__s); } - - template - _LIBCPP_HIDE_FROM_ABI static const __policy* __choose_policy(/* is_small = */ false_type) { - static constexpr __policy __policy = { - std::addressof(__large_clone<_Fun>), - std::addressof(__large_destroy<_Fun>), - false, -# if _LIBCPP_HAS_RTTI - &typeid(_Fun) -# else - nullptr -# endif - }; - return &__policy; - } - - template - _LIBCPP_HIDE_FROM_ABI static const __policy* __choose_policy(/* is_small = */ true_type) { - static constexpr __policy __policy = { - nullptr, - nullptr, - false, -# if _LIBCPP_HAS_RTTI - &typeid(_Fun) -# else - nullptr -# endif - }; - return &__policy; - } }; // Used to choose between perfect forwarding or pass-by-value. Pass-by-value is @@ -455,14 +448,15 @@ public: template , __policy_func>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI explicit __policy_func(_Fp&& __f) : __policy_(__policy::__create_empty()) { - if (__function::__not_null(__f)) { - __func_ = __call_func<_Fp>; - __policy_ = __policy::__create<_Fp>(); - if (__use_small_storage<_Fp>()) { - ::new ((void*)&__buf_.__small) _Fp(std::move(__f)); - } else { - __buf_.__large = ::new _Fp(std::move(__f)); - } + if (__function::__is_null(__f)) + return; + + __func_ = __call_func<_Fp>; + __policy_ = __policy::__create<_Fp>(); + if (__use_small_storage<_Fp>()) { + ::new ((void*)&__buf_.__small) _Fp(std::move(__f)); + } else { + __buf_.__large = ::new _Fp(std::move(__f)); } } @@ -615,21 +609,9 @@ class function<_Rp(_ArgTypes...)> __func __f_; - template , function>, __is_invocable<_Fp, _ArgTypes...> >::value> - struct __callable; template - struct __callable<_Fp, true> { - static const bool value = - is_void<_Rp>::value || __is_core_convertible<__invoke_result_t<_Fp, _ArgTypes...>, _Rp>::value; - }; - template - struct __callable<_Fp, false> { - static const bool value = false; - }; - - template - using _EnableIfLValueCallable _LIBCPP_NODEBUG = __enable_if_t<__callable<_Fp&>::value>; + using _EnableIfLValueCallable _LIBCPP_NODEBUG = __enable_if_t< + _And<_IsNotSame<__remove_cvref_t<_Fp>, function>, __is_invocable_r<_Rp, _Fp&, _ArgTypes...>>::value>; public: typedef _Rp result_type; @@ -690,11 +672,11 @@ public: # if _LIBCPP_HAS_RTTI // function target access: - _LIBCPP_HIDE_FROM_ABI const std::type_info& target_type() const _NOEXCEPT; + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const std::type_info& target_type() const _NOEXCEPT; template - _LIBCPP_HIDE_FROM_ABI _Tp* target() _NOEXCEPT; + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _Tp* target() _NOEXCEPT; template - _LIBCPP_HIDE_FROM_ABI const _Tp* target() const _NOEXCEPT; + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const _Tp* target() const _NOEXCEPT; # endif // _LIBCPP_HAS_RTTI }; diff --git a/lib/libcxx/include/__functional/hash.h b/lib/libcxx/include/__functional/hash.h index 83bbf1b5e2..d81ff1abbd 100644 --- a/lib/libcxx/include/__functional/hash.h +++ b/lib/libcxx/include/__functional/hash.h @@ -433,13 +433,10 @@ struct __hash_impl : __scalar_hash { template struct hash : public __hash_impl<_Tp> {}; -#if _LIBCPP_STD_VER >= 17 - template <> struct hash : public __unary_function { - _LIBCPP_HIDE_FROM_ABI size_t operator()(nullptr_t) const _NOEXCEPT { return 662607004ull; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t operator()(nullptr_t) const _NOEXCEPT { return 662607004ull; } }; -#endif #ifndef _LIBCPP_CXX03_LANG template @@ -452,18 +449,12 @@ template > using __has_enabled_hash _LIBCPP_NODEBUG = integral_constant::value && is_default_constructible<_Hash>::value >; -# if _LIBCPP_STD_VER >= 17 template using __enable_hash_helper_imp _LIBCPP_NODEBUG = _Type; template using __enable_hash_helper _LIBCPP_NODEBUG = __enable_hash_helper_imp<_Type, __enable_if_t<__all<__has_enabled_hash<_Keys>::value...>::value> >; -# else -template -using __enable_hash_helper _LIBCPP_NODEBUG = _Type; -# endif - #endif // !_LIBCPP_CXX03_LANG _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__functional/identity.h b/lib/libcxx/include/__functional/identity.h index 1b1c6cf73c..02dde2b4f3 100644 --- a/lib/libcxx/include/__functional/identity.h +++ b/lib/libcxx/include/__functional/identity.h @@ -44,7 +44,7 @@ struct __is_identity > : true_type {}; struct identity { template - [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& operator()(_Tp&& __t) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& operator()(_LIBCPP_LIFETIMEBOUND _Tp&& __t) const noexcept { return std::forward<_Tp>(__t); } diff --git a/lib/libcxx/include/__functional/is_transparent.h b/lib/libcxx/include/__functional/is_transparent.h index 567df1a662..c2c6fbce24 100644 --- a/lib/libcxx/include/__functional/is_transparent.h +++ b/lib/libcxx/include/__functional/is_transparent.h @@ -29,6 +29,14 @@ inline const bool __is_transparent_v<_Tp, _Key, __void_t(arg))`. +// +// This is different from `__is_transparent_v`, which is only a property of the comparator and doesn't provide +// additional semantic guarantees. +template +inline const bool __is_transparently_comparable_v = false; + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___FUNCTIONAL_IS_TRANSPARENT diff --git a/lib/libcxx/include/__functional/mem_fn.h b/lib/libcxx/include/__functional/mem_fn.h index 690393988c..1c9340c4f4 100644 --- a/lib/libcxx/include/__functional/mem_fn.h +++ b/lib/libcxx/include/__functional/mem_fn.h @@ -43,7 +43,8 @@ public: }; template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __mem_fn<_Rp _Tp::*> mem_fn(_Rp _Tp::*__pm) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __mem_fn<_Rp _Tp::*> +mem_fn(_Rp _Tp::* __pm) _NOEXCEPT { return __mem_fn<_Rp _Tp::*>(__pm); } diff --git a/lib/libcxx/include/__functional/operations.h b/lib/libcxx/include/__functional/operations.h index 7b0ea11db5..c0e719bb58 100644 --- a/lib/libcxx/include/__functional/operations.h +++ b/lib/libcxx/include/__functional/operations.h @@ -15,7 +15,9 @@ #include <__functional/unary_function.h> #include <__fwd/functional.h> #include <__type_traits/desugars_to.h> +#include <__type_traits/is_generic_transparent_comparator.h> #include <__type_traits/is_integral.h> +#include <__type_traits/make_transparent.h> #include <__utility/forward.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -377,6 +379,14 @@ struct less { typedef void is_transparent; }; +template +struct __make_transparent<_Tp, less<_Tp> > { + using type _LIBCPP_NODEBUG = less<>; +}; + +template <> +inline const bool __is_generic_transparent_comparator_v> = true; + template inline const bool __desugars_to_v<__less_tag, less<>, _Tp, _Up> = true; @@ -466,6 +476,14 @@ struct greater { template inline const bool __desugars_to_v<__greater_tag, greater<>, _Tp, _Up> = true; + +template +struct __make_transparent<_Tp, greater<_Tp>> { + using type _LIBCPP_NODEBUG = greater<>; +}; + +template <> +inline const bool __is_generic_transparent_comparator_v> = true; #endif // Logical operations diff --git a/lib/libcxx/include/__functional/ranges_operations.h b/lib/libcxx/include/__functional/ranges_operations.h index df95843e7c..dc9da061af 100644 --- a/lib/libcxx/include/__functional/ranges_operations.h +++ b/lib/libcxx/include/__functional/ranges_operations.h @@ -14,6 +14,7 @@ #include <__concepts/totally_ordered.h> #include <__config> #include <__type_traits/desugars_to.h> +#include <__type_traits/is_generic_transparent_comparator.h> #include <__utility/forward.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -108,6 +109,12 @@ inline const bool __desugars_to_v<__less_tag, ranges::less, _Tp, _Up> = true; template inline const bool __desugars_to_v<__greater_tag, ranges::greater, _Tp, _Up> = true; +template <> +inline const bool __is_generic_transparent_comparator_v = true; + +template <> +inline const bool __is_generic_transparent_comparator_v = true; + #endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__functional/reference_wrapper.h b/lib/libcxx/include/__functional/reference_wrapper.h index 148703b21d..b1efd9f76d 100644 --- a/lib/libcxx/include/__functional/reference_wrapper.h +++ b/lib/libcxx/include/__functional/reference_wrapper.h @@ -58,7 +58,7 @@ public: // access _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 operator type&() const _NOEXCEPT { return *__f_; } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 type& get() const _NOEXCEPT { return *__f_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 type& get() const _NOEXCEPT { return *__f_; } // invoke template @@ -128,23 +128,25 @@ reference_wrapper(_Tp&) -> reference_wrapper<_Tp>; #endif template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference_wrapper<_Tp> ref(_Tp& __t) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI +_LIBCPP_CONSTEXPR_SINCE_CXX20 reference_wrapper<_Tp> ref(_Tp& __t) _NOEXCEPT { return reference_wrapper<_Tp>(__t); } template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference_wrapper<_Tp> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference_wrapper<_Tp> ref(reference_wrapper<_Tp> __t) _NOEXCEPT { return __t; } template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference_wrapper cref(const _Tp& __t) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference_wrapper +cref(const _Tp& __t) _NOEXCEPT { return reference_wrapper(__t); } template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference_wrapper +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference_wrapper cref(reference_wrapper<_Tp> __t) _NOEXCEPT { return __t; } diff --git a/lib/libcxx/include/__functional/weak_result_type.h b/lib/libcxx/include/__functional/weak_result_type.h index aa462e4d5c..4232bdc69d 100644 --- a/lib/libcxx/include/__functional/weak_result_type.h +++ b/lib/libcxx/include/__functional/weak_result_type.h @@ -13,9 +13,9 @@ #include <__config> #include <__functional/binary_function.h> #include <__functional/unary_function.h> -#include <__type_traits/integral_constant.h> #include <__type_traits/invoke.h> #include <__type_traits/is_same.h> +#include <__type_traits/void_t.h> #include <__utility/declval.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -24,50 +24,36 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template -struct __has_result_type { -private: - template - static false_type __test(...); - template - static true_type __test(typename _Up::result_type* = 0); +template +inline const bool __has_result_type_v = false; -public: - static const bool value = decltype(__test<_Tp>(0))::value; -}; +template +inline const bool __has_result_type_v<_Tp, __void_t > = true; // __weak_result_type template struct __derives_from_unary_function { private: - struct __two { - char __lx; - char __lxx; - }; - static __two __test(...); + static void __find_base(...); template - static __unary_function<_Ap, _Rp> __test(const volatile __unary_function<_Ap, _Rp>*); + static __unary_function<_Ap, _Rp> __find_base(const volatile __unary_function<_Ap, _Rp>*); public: - static const bool value = !is_same::value; - typedef decltype(__test((_Tp*)0)) type; + using type = decltype(__find_base(static_cast<_Tp*>(nullptr))); + static const bool value = !is_same::value; }; template struct __derives_from_binary_function { private: - struct __two { - char __lx; - char __lxx; - }; - static __two __test(...); + static void __find_base(...); template - static __binary_function<_A1, _A2, _Rp> __test(const volatile __binary_function<_A1, _A2, _Rp>*); + static __binary_function<_A1, _A2, _Rp> __find_base(const volatile __binary_function<_A1, _A2, _Rp>*); public: - static const bool value = !is_same::value; - typedef decltype(__test((_Tp*)0)) type; + using type = decltype(__find_base(static_cast<_Tp*>(nullptr))); + static const bool value = !is_same::value; }; template ::value> @@ -85,7 +71,7 @@ struct __maybe_derive_from_binary_function // bool is true template struct __maybe_derive_from_binary_function<_Tp, false> {}; -template ::value> +template > struct __weak_result_type_imp // bool is true : public __maybe_derive_from_unary_function<_Tp>, public __maybe_derive_from_binary_function<_Tp> { diff --git a/lib/libcxx/include/__fwd/ios.h b/lib/libcxx/include/__fwd/ios.h index 831624f4b1..fd6738a6b3 100644 --- a/lib/libcxx/include/__fwd/ios.h +++ b/lib/libcxx/include/__fwd/ios.h @@ -31,7 +31,7 @@ using wios = basic_ios; template class _LIBCPP_PREFERRED_NAME(ios) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wios)) basic_ios; -#if defined(_NEWLIB_VERSION) +#if _LIBCPP_LIBC_NEWLIB // On newlib, off_t is 'long int' using streamoff = long int; // for char_traits in #else diff --git a/lib/libcxx/include/__fwd/tuple.h b/lib/libcxx/include/__fwd/tuple.h index fb922b29f3..dc96c03e20 100644 --- a/lib/libcxx/include/__fwd/tuple.h +++ b/lib/libcxx/include/__fwd/tuple.h @@ -21,11 +21,25 @@ _LIBCPP_BEGIN_NAMESPACE_STD template struct tuple_element; +template +using __tuple_element_t _LIBCPP_NODEBUG = typename tuple_element<_Np, _Tp>::type; + #ifndef _LIBCPP_CXX03_LANG template class tuple; +template +inline const bool __is_tuple_v = false; + +template +inline const bool __is_tuple_v> = true; + +template +struct tuple_element<_Ip, tuple<_Tp...> > { + using type _LIBCPP_NODEBUG = __type_pack_element<_Ip, _Tp...>; +}; + template struct tuple_size; diff --git a/lib/libcxx/include/__hash_table b/lib/libcxx/include/__hash_table index 78f2f3bfd2..ef487fb06d 100644 --- a/lib/libcxx/include/__hash_table +++ b/lib/libcxx/include/__hash_table @@ -10,6 +10,7 @@ #ifndef _LIBCPP___HASH_TABLE #define _LIBCPP___HASH_TABLE +#include <__algorithm/fill_n.h> #include <__algorithm/max.h> #include <__algorithm/min.h> #include <__assert> @@ -28,7 +29,6 @@ #include <__memory/swap_allocator.h> #include <__memory/unique_ptr.h> #include <__new/launder.h> -#include <__type_traits/can_extract_key.h> #include <__type_traits/copy_cvref.h> #include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> @@ -44,7 +44,9 @@ #include <__utility/forward.h> #include <__utility/move.h> #include <__utility/pair.h> +#include <__utility/scope_guard.h> #include <__utility/swap.h> +#include <__utility/try_key_extraction.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -81,18 +83,6 @@ struct __hash_node_base { typedef _NodePtr __node_pointer; typedef __node_base_pointer __next_pointer; -// TODO(LLVM 22): Remove this check -#ifndef _LIBCPP_ABI_FIX_UNORDERED_NODE_POINTER_UB - static_assert(sizeof(__node_base_pointer) == sizeof(__node_pointer) && _LIBCPP_ALIGNOF(__node_base_pointer) == - _LIBCPP_ALIGNOF(__node_pointer), - "It looks like you are using std::__hash_table (an implementation detail for the unordered containers) " - "with a fancy pointer type that thas a different representation depending on whether it points to a " - "__hash_table base pointer or a __hash_table node pointer (both of which are implementation details of " - "the standard library). This means that your ABI is being broken between LLVM 19 and LLVM 20. If you " - "don't care about your ABI being broken, define the _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB macro to " - "silence this diagnostic."); -#endif - __next_pointer __next_; _LIBCPP_HIDE_FROM_ABI __next_pointer __ptr() _NOEXCEPT { @@ -122,6 +112,19 @@ struct __get_hash_node_value_type<__hash_value_type<_Key, _Tp> > { template using __get_hash_node_value_type_t _LIBCPP_NODEBUG = typename __get_hash_node_value_type<_Tp>::type; +template +struct __get_hash_node_key_type { + using type _LIBCPP_NODEBUG = _Tp; +}; + +template +struct __get_hash_node_key_type<__hash_value_type<_Key, _Tp> > { + using type _LIBCPP_NODEBUG = _Key; +}; + +template +using __get_hash_node_key_type_t _LIBCPP_NODEBUG = typename __get_hash_node_key_type<_Tp>::type; + template struct __hash_node : public __hash_node_base< __rebind_pointer_t<_VoidPtr, __hash_node<_Tp, _VoidPtr> > > { using __node_value_type _LIBCPP_NODEBUG = __get_hash_node_value_type_t<_Tp>; @@ -152,7 +155,12 @@ public: } #endif - _LIBCPP_HIDE_FROM_ABI explicit __hash_node(__next_pointer __next, size_t __hash) : _Base(__next), __hash_(__hash) {} + template + _LIBCPP_HIDE_FROM_ABI explicit __hash_node(size_t __hash, _Alloc& __na, _Args&&... __args) + : _Base(nullptr), __hash_(__hash) { + allocator_traits<_Alloc>::construct(__na, std::addressof(__get_value()), std::forward<_Args>(__args)...); + } + _LIBCPP_HIDE_FROM_ABI ~__hash_node() {} }; @@ -182,85 +190,16 @@ class __hash_map_iterator; template class __hash_map_const_iterator; -template -struct __hash_key_value_types { - static_assert(!is_reference<_Tp>::value && !is_const<_Tp>::value, ""); - typedef _Tp key_type; - typedef _Tp __node_value_type; - typedef _Tp __container_value_type; - static const bool __is_map = false; - - _LIBCPP_HIDE_FROM_ABI static key_type const& __get_key(_Tp const& __v) { return __v; } - _LIBCPP_HIDE_FROM_ABI static __container_value_type const& __get_value(__node_value_type const& __v) { return __v; } - _LIBCPP_HIDE_FROM_ABI static __container_value_type* __get_ptr(__node_value_type& __n) { return std::addressof(__n); } - _LIBCPP_HIDE_FROM_ABI static __container_value_type&& __move(__node_value_type& __v) { return std::move(__v); } -}; - -template -struct __hash_key_value_types<__hash_value_type<_Key, _Tp> > { - typedef _Key key_type; - typedef _Tp mapped_type; - typedef __hash_value_type<_Key, _Tp> __node_value_type; - typedef pair __container_value_type; - typedef __container_value_type __map_value_type; - static const bool __is_map = true; - - _LIBCPP_HIDE_FROM_ABI static key_type const& __get_key(__container_value_type const& __v) { return __v.first; } - - template , __node_value_type>::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI static __container_value_type const& __get_value(_Up& __t) { - return __t.__get_value(); - } - - template , __container_value_type>::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI static __container_value_type const& __get_value(_Up& __t) { - return __t; - } - - _LIBCPP_HIDE_FROM_ABI static __container_value_type* __get_ptr(__container_value_type& __n) { - return std::addressof(__n); - } - _LIBCPP_HIDE_FROM_ABI static pair __move(__node_value_type& __v) { return __v.__move(); } -}; - -template , bool = _KVTypes::__is_map> -struct __hash_map_pointer_types {}; - -template -struct __hash_map_pointer_types<_Tp, _AllocPtr, _KVTypes, true> { - typedef typename _KVTypes::__map_value_type _Mv; - typedef __rebind_pointer_t<_AllocPtr, _Mv> __map_value_type_pointer; - typedef __rebind_pointer_t<_AllocPtr, const _Mv> __const_map_value_type_pointer; -}; - template ::element_type> struct __hash_node_types; template -struct __hash_node_types<_NodePtr, __hash_node<_Tp, _VoidPtr> > - : public __hash_key_value_types<_Tp>, - __hash_map_pointer_types<_Tp, _VoidPtr> - -{ - typedef __hash_key_value_types<_Tp> __base; - -public: - typedef ptrdiff_t difference_type; - typedef size_t size_type; - - typedef __rebind_pointer_t<_NodePtr, void> __void_pointer; - +struct __hash_node_types<_NodePtr, __hash_node<_Tp, _VoidPtr> > { typedef typename pointer_traits<_NodePtr>::element_type __node_type; - typedef _NodePtr __node_pointer; - typedef __hash_node_base<__node_pointer> __node_base_type; - typedef __rebind_pointer_t<_NodePtr, __node_base_type> __node_base_pointer; - - typedef typename __node_base_type::__next_pointer __next_pointer; + typedef typename __hash_node_base<_NodePtr>::__next_pointer __next_pointer; using __node_value_type _LIBCPP_NODEBUG = __get_hash_node_value_type_t<_Tp>; - typedef __rebind_pointer_t<_VoidPtr, __node_value_type> __node_value_type_pointer; - typedef __rebind_pointer_t<_VoidPtr, const __node_value_type> __const_node_value_type_pointer; private: static_assert(!is_const<__node_type>::value, "_NodePtr should never be a pointer to const"); @@ -281,13 +220,6 @@ struct __hash_node_types_from_iterator<__hash_local_iterator<_NodePtr> > : __has template struct __hash_node_types_from_iterator<__hash_const_local_iterator<_NodePtr> > : __hash_node_types<_NodePtr> {}; -template -struct __make_hash_node_types { - typedef __hash_node<_NodeValueTp, _VoidPtr> _NodeTp; - typedef __rebind_pointer_t<_VoidPtr, _NodeTp> _NodePtr; - typedef __hash_node_types<_NodePtr> type; -}; - template class __hash_iterator { typedef __hash_node_types<_NodePtr> _NodeTypes; @@ -299,9 +231,9 @@ class __hash_iterator { public: typedef forward_iterator_tag iterator_category; typedef typename _NodeTypes::__node_value_type value_type; - typedef typename _NodeTypes::difference_type difference_type; + using difference_type = ptrdiff_t; typedef value_type& reference; - typedef typename _NodeTypes::__node_value_type_pointer pointer; + using pointer = __rebind_pointer_t<_NodePtr, value_type>; _LIBCPP_HIDE_FROM_ABI __hash_iterator() _NOEXCEPT : __node_(nullptr) {} @@ -366,9 +298,9 @@ public: typedef forward_iterator_tag iterator_category; typedef typename _NodeTypes::__node_value_type value_type; - typedef typename _NodeTypes::difference_type difference_type; + using difference_type = ptrdiff_t; typedef const value_type& reference; - typedef typename _NodeTypes::__const_node_value_type_pointer pointer; + using pointer = __rebind_pointer_t<_NodePtr, const value_type>; _LIBCPP_HIDE_FROM_ABI __hash_const_iterator() _NOEXCEPT : __node_(nullptr) {} @@ -431,9 +363,9 @@ class __hash_local_iterator { public: typedef forward_iterator_tag iterator_category; typedef typename _NodeTypes::__node_value_type value_type; - typedef typename _NodeTypes::difference_type difference_type; + using difference_type = ptrdiff_t; typedef value_type& reference; - typedef typename _NodeTypes::__node_value_type_pointer pointer; + using pointer = __rebind_pointer_t<_NodePtr, value_type>; _LIBCPP_HIDE_FROM_ABI __hash_local_iterator() _NOEXCEPT : __node_(nullptr) {} @@ -509,9 +441,9 @@ public: typedef forward_iterator_tag iterator_category; typedef typename _NodeTypes::__node_value_type value_type; - typedef typename _NodeTypes::difference_type difference_type; + using difference_type = ptrdiff_t; typedef const value_type& reference; - typedef typename _NodeTypes::__const_node_value_type_pointer pointer; + using pointer = __rebind_pointer_t<_ConstNodePtr, const value_type>; _LIBCPP_HIDE_FROM_ABI __hash_const_local_iterator() _NOEXCEPT : __node_(nullptr) {} @@ -617,8 +549,6 @@ public: typedef typename __alloc_traits::pointer pointer; private: - typedef __hash_node_types _NodeTypes; - allocator_type& __na_; public: @@ -633,7 +563,7 @@ public: _LIBCPP_HIDE_FROM_ABI void operator()(pointer __p) _NOEXCEPT { if (__value_constructed) { - __alloc_traits::destroy(__na_, _NodeTypes::__get_ptr(__p->__get_value())); + __alloc_traits::destroy(__na_, std::addressof(__p->__get_value())); std::__destroy_at(std::addressof(*__p)); } if (__p) @@ -684,18 +614,16 @@ template class __hash_table { public: using value_type = __get_hash_node_value_type_t<_Tp>; + using key_type = __get_hash_node_key_type_t<_Tp>; + typedef _Hash hasher; typedef _Equal key_equal; typedef _Alloc allocator_type; private: typedef allocator_traits __alloc_traits; - typedef typename __make_hash_node_types<_Tp, typename __alloc_traits::void_pointer>::type _NodeTypes; public: - typedef typename _NodeTypes::__node_value_type __node_value_type; - typedef typename _NodeTypes::__container_value_type __container_value_type; - typedef typename _NodeTypes::key_type key_type; typedef value_type& reference; typedef const value_type& const_reference; typedef typename __alloc_traits::pointer pointer; @@ -703,22 +631,23 @@ public: #ifndef _LIBCPP_ABI_FIX_UNORDERED_CONTAINER_SIZE_TYPE typedef typename __alloc_traits::size_type size_type; #else - typedef typename _NodeTypes::size_type size_type; + using size_type = size_t; #endif - typedef typename _NodeTypes::difference_type difference_type; + using difference_type = ptrdiff_t; public: // Create __node - typedef typename _NodeTypes::__node_type __node; - typedef __rebind_alloc<__alloc_traits, __node> __node_allocator; - typedef allocator_traits<__node_allocator> __node_traits; - typedef typename _NodeTypes::__void_pointer __void_pointer; - typedef typename _NodeTypes::__node_pointer __node_pointer; - typedef typename _NodeTypes::__node_pointer __node_const_pointer; - typedef typename _NodeTypes::__node_base_type __first_node; - typedef typename _NodeTypes::__node_base_pointer __node_base_pointer; - typedef typename _NodeTypes::__next_pointer __next_pointer; + using __void_pointer _LIBCPP_NODEBUG = typename __alloc_traits::void_pointer; + + using __node _LIBCPP_NODEBUG = __hash_node<_Tp, __void_pointer>; + using __node_allocator _LIBCPP_NODEBUG = __rebind_alloc<__alloc_traits, __node>; + using __node_traits _LIBCPP_NODEBUG = allocator_traits<__node_allocator>; + using __node_pointer _LIBCPP_NODEBUG = __rebind_pointer_t<__void_pointer, __node>; + + using __first_node _LIBCPP_NODEBUG = __hash_node_base<__node_pointer>; + using __node_base_pointer _LIBCPP_NODEBUG = __rebind_pointer_t<__void_pointer, __first_node>; + using __next_pointer _LIBCPP_NODEBUG = __node_base_pointer; private: // check for sane allocator pointer rebinding semantics. Rebinding the @@ -747,6 +676,38 @@ private: _LIBCPP_HIDE_FROM_ABI size_type& size() _NOEXCEPT { return __size_; } + _LIBCPP_HIDE_FROM_ABI void + __copy_construct(__next_pointer __other_iter, __next_pointer __own_iter, size_t __current_chash) { + auto __bucket_count = bucket_count(); + + for (; __other_iter; __other_iter = __other_iter->__next_) { + __node_holder __new_node = __construct_node_hash(__other_iter->__hash(), __other_iter->__upcast()->__get_value()); + + size_t __new_chash = std::__constrain_hash(__new_node->__hash(), __bucket_count); + if (__new_chash != __current_chash) { + __bucket_list_[__new_chash] = __own_iter; + __current_chash = __new_chash; + } + + __own_iter->__next_ = static_cast<__next_pointer>(__new_node.release()); + __own_iter = __own_iter->__next_; + } + } + + _LIBCPP_HIDE_FROM_ABI void __copy_construct(__next_pointer __other_iter) { + __next_pointer __own_iter = __first_node_.__ptr(); + { + __node_holder __new_node = __construct_node_hash(__other_iter->__hash(), __other_iter->__upcast()->__get_value()); + __own_iter->__next_ = static_cast<__next_pointer>(__new_node.release()); + } + + size_t __current_chash = std::__constrain_hash(__own_iter->__next_->__hash(), bucket_count()); + __bucket_list_[__current_chash] = __own_iter; + __other_iter = __other_iter->__next_; + __own_iter = __own_iter->__next_; + __copy_construct(__other_iter, __own_iter, __current_chash); + } + public: _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __size_; } @@ -811,40 +772,66 @@ public: _LIBCPP_HIDE_FROM_ABI iterator __node_insert_multi(__node_pointer __nd); _LIBCPP_HIDE_FROM_ABI iterator __node_insert_multi(const_iterator __p, __node_pointer __nd); - template - _LIBCPP_HIDE_FROM_ABI pair __emplace_unique_key_args(_Key const& __k, _Args&&... __args); - - template - _LIBCPP_HIDE_FROM_ABI pair __emplace_unique_impl(_Args&&... __args); - - template - _LIBCPP_HIDE_FROM_ABI pair __emplace_unique(_Pp&& __x) { - return __emplace_unique_extract_key(std::forward<_Pp>(__x), __can_extract_key<_Pp, key_type>()); - } - - template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI pair __emplace_unique(_First&& __f, _Second&& __s) { - return __emplace_unique_key_args(__f, std::forward<_First>(__f), std::forward<_Second>(__s)); - } - template _LIBCPP_HIDE_FROM_ABI pair __emplace_unique(_Args&&... __args) { - return __emplace_unique_impl(std::forward<_Args>(__args)...); - } - - template - _LIBCPP_HIDE_FROM_ABI pair __emplace_unique_extract_key(_Pp&& __x, __extract_key_fail_tag) { - return __emplace_unique_impl(std::forward<_Pp>(__x)); - } - template - _LIBCPP_HIDE_FROM_ABI pair __emplace_unique_extract_key(_Pp&& __x, __extract_key_self_tag) { - return __emplace_unique_key_args(__x, std::forward<_Pp>(__x)); - } - template - _LIBCPP_HIDE_FROM_ABI pair __emplace_unique_extract_key(_Pp&& __x, __extract_key_first_tag) { - return __emplace_unique_key_args(__x.first, std::forward<_Pp>(__x)); + return std::__try_key_extraction( + [this](const key_type& __key, _Args&&... __args2) { + size_t __hash = hash_function()(__key); + size_type __bc = bucket_count(); + bool __inserted = false; + __next_pointer __nd; + size_t __chash; + if (__bc != 0) { + __chash = std::__constrain_hash(__hash, __bc); + __nd = __bucket_list_[__chash]; + if (__nd != nullptr) { + for (__nd = __nd->__next_; + __nd != nullptr && + (__nd->__hash() == __hash || std::__constrain_hash(__nd->__hash(), __bc) == __chash); + __nd = __nd->__next_) { + if ((__nd->__hash() == __hash) && key_eq()(__nd->__upcast()->__get_value(), __key)) + goto __done; + } + } + } + { + __node_holder __h = __construct_node_hash(__hash, std::forward<_Args>(__args2)...); + if (size() + 1 > __bc * max_load_factor()) { + __rehash_unique(std::max(2 * __bc + !std::__is_hash_power2(__bc), + size_type(__math::ceil(float(size() + 1) / max_load_factor())))); + __bc = bucket_count(); + __chash = std::__constrain_hash(__hash, __bc); + } + // insert_after __bucket_list_[__chash], or __first_node if bucket is null + __next_pointer __pn = __bucket_list_[__chash]; + if (__pn == nullptr) { + __pn = __first_node_.__ptr(); + __h->__next_ = __pn->__next_; + __pn->__next_ = __h.get()->__ptr(); + // fix up __bucket_list_ + __bucket_list_[__chash] = __pn; + if (__h->__next_ != nullptr) + __bucket_list_[std::__constrain_hash(__h->__next_->__hash(), __bc)] = __h.get()->__ptr(); + } else { + __h->__next_ = __pn->__next_; + __pn->__next_ = static_cast<__next_pointer>(__h.get()); + } + __nd = static_cast<__next_pointer>(__h.release()); + // increment size + ++size(); + __inserted = true; + } + __done: + return pair(iterator(__nd), __inserted); + }, + [this](_Args&&... __args2) { + __node_holder __h = __construct_node(std::forward<_Args>(__args2)...); + pair __r = __node_insert_unique(__h.get()); + if (__r.second) + __h.release(); + return __r; + }, + std::forward<_Args>(__args)...); } template @@ -854,9 +841,7 @@ public: template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI void __insert_unique_from_orphaned_node(value_type&& __value) { - using __key_type = typename _NodeTypes::key_type; - - __node_holder __h = __construct_node(const_cast<__key_type&&>(__value.first), std::move(__value.second)); + __node_holder __h = __construct_node(const_cast(__value.first), std::move(__value.second)); __node_insert_unique(__h.get()); __h.release(); } @@ -870,9 +855,7 @@ public: template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI void __insert_multi_from_orphaned_node(value_type&& __value) { - using __key_type = typename _NodeTypes::key_type; - - __node_holder __h = __construct_node(const_cast<__key_type&&>(__value.first), std::move(__value.second)); + __node_holder __h = __construct_node(const_cast(__value.first), std::move(__value.second)); __node_insert_multi(__h.get()); __h.release(); } @@ -1017,8 +1000,8 @@ private: template _LIBCPP_HIDE_FROM_ABI __node_holder __construct_node(_Args&&... __args); - template - _LIBCPP_HIDE_FROM_ABI __node_holder __construct_node_hash(size_t __hash, _First&& __f, _Rest&&... __rest); + template + _LIBCPP_HIDE_FROM_ABI __node_holder __construct_node_hash(size_t __hash, _Args&&... __args); _LIBCPP_HIDE_FROM_ABI void __copy_assign_alloc(const __hash_table& __u) { __copy_assign_alloc(__u, integral_constant()); @@ -1042,17 +1025,29 @@ private: } _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__hash_table&, false_type) _NOEXCEPT {} - _LIBCPP_HIDE_FROM_ABI void __deallocate_node(__next_pointer __np) _NOEXCEPT; + _LIBCPP_HIDE_FROM_ABI void __deallocate_node(__node_pointer __nd) _NOEXCEPT { + auto& __alloc = __node_alloc(); + __node_traits::destroy(__alloc, std::addressof(__nd->__get_value())); + std::__destroy_at(std::__to_address(__nd)); + __node_traits::deallocate(__alloc, __nd, 1); + } + + _LIBCPP_HIDE_FROM_ABI void __deallocate_node_list(__next_pointer __np) _NOEXCEPT { + while (__np != nullptr) { + __next_pointer __next = __np->__next_; + __deallocate_node(__np->__upcast()); + __np = __next; + } + } + _LIBCPP_HIDE_FROM_ABI __next_pointer __detach() _NOEXCEPT; template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI void __assign_value(__get_hash_node_value_type_t<_Tp>& __lhs, _From&& __rhs) { - using __key_type = typename _NodeTypes::key_type; - // This is technically UB, since the object was constructed as `const`. // Clang doesn't optimize on this currently though. - const_cast<__key_type&>(__lhs.first) = const_cast<__copy_cvref_t<_From, __key_type>&&>(__rhs.first); - __lhs.second = std::forward<_From>(__rhs).second; + const_cast(__lhs.first) = const_cast<__copy_cvref_t<_From, key_type>&&>(__rhs.first); + __lhs.second = std::forward<_From>(__rhs).second; } template ::value, int> = 0> @@ -1101,16 +1096,29 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(const allocator_type& __a __max_load_factor_(1.0f) {} template -__hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(const __hash_table& __u) +__hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(const __hash_table& __other) : __bucket_list_(nullptr, - __bucket_list_deleter(allocator_traits<__pointer_allocator>::select_on_container_copy_construction( - __u.__bucket_list_.get_deleter().__alloc()), + __bucket_list_deleter(__pointer_alloc_traits::select_on_container_copy_construction( + __other.__bucket_list_.get_deleter().__alloc()), 0)), - __node_alloc_(allocator_traits<__node_allocator>::select_on_container_copy_construction(__u.__node_alloc())), + __node_alloc_(__node_traits::select_on_container_copy_construction(__other.__node_alloc())), __size_(0), - __hasher_(__u.hash_function()), - __max_load_factor_(__u.__max_load_factor_), - __key_eq_(__u.__key_eq_) {} + __hasher_(__other.hash_function()), + __max_load_factor_(__other.__max_load_factor_), + __key_eq_(__other.__key_eq_) { + if (__other.size() == 0) + return; + + auto& __bucket_list_del = __bucket_list_.get_deleter(); + auto __bucket_count = __other.bucket_count(); + __bucket_list_.reset(__pointer_alloc_traits::allocate(__bucket_list_del.__alloc(), __bucket_count)); + __bucket_list_del.size() = __bucket_count; + + std::fill_n(__bucket_list_.get(), __bucket_count, nullptr); + + __copy_construct(__other.__first_node_.__next_); + __size_ = __other.size(); +} template __hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(const __hash_table& __u, const allocator_type& __a) @@ -1169,7 +1177,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::~__hash_table() { static_assert(is_copy_constructible::value, "Hasher must be copy-constructible."); #endif - __deallocate_node(__first_node_.__next_); + __deallocate_node_list(__first_node_.__next_); } template @@ -1184,28 +1192,76 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__copy_assign_alloc(const __hash_ } template -__hash_table<_Tp, _Hash, _Equal, _Alloc>& __hash_table<_Tp, _Hash, _Equal, _Alloc>::operator=(const __hash_table& __u) { - if (this != std::addressof(__u)) { - __copy_assign_alloc(__u); - hash_function() = __u.hash_function(); - key_eq() = __u.key_eq(); - max_load_factor() = __u.max_load_factor(); - __assign_multi(__u.begin(), __u.end()); - } - return *this; -} +__hash_table<_Tp, _Hash, _Equal, _Alloc>& +__hash_table<_Tp, _Hash, _Equal, _Alloc>::operator=(const __hash_table& __other) { + if (this == std::addressof(__other)) + return *this; -template -void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__deallocate_node(__next_pointer __np) _NOEXCEPT { - __node_allocator& __na = __node_alloc(); - while (__np != nullptr) { - __next_pointer __next = __np->__next_; - __node_pointer __real_np = __np->__upcast(); - __node_traits::destroy(__na, _NodeTypes::__get_ptr(__real_np->__get_value())); - std::__destroy_at(std::addressof(*__real_np)); - __node_traits::deallocate(__na, __real_np, 1); - __np = __next; + __copy_assign_alloc(__other); + hash_function() = __other.hash_function(); + key_eq() = __other.key_eq(); + max_load_factor() = __other.max_load_factor(); + + if (__other.size() == 0) { + clear(); + return *this; } + + auto __bucket_count = __other.bucket_count(); + if (__bucket_count != bucket_count()) { + auto& __bucket_list_del = __bucket_list_.get_deleter(); + __bucket_list_.reset(__pointer_alloc_traits::allocate(__bucket_list_del.__alloc(), __bucket_count)); + __bucket_list_del.size() = __bucket_count; + } + std::fill_n(__bucket_list_.get(), __bucket_count, nullptr); + + if (!__first_node_.__next_) { + __copy_construct(__other.__first_node_.__next_); + __size_ = __other.size(); + return *this; + } + + __next_pointer __other_iter = __other.__first_node_.__next_; + __next_pointer __own_iter = __first_node_.__ptr(); + { + __node_pointer __next = __own_iter->__next_->__upcast(); + __assign_value(__next->__get_value(), __other_iter->__upcast()->__get_value()); + __next->__hash_ = __other_iter->__hash(); + } + size_t __current_chash = std::__constrain_hash(__own_iter->__next_->__hash(), __bucket_count); + __bucket_list_[__current_chash] = __own_iter; + __other_iter = __other_iter->__next_; + __own_iter = __own_iter->__next_; + + // Go through the nodes of the incoming hash table and copy then into the destination hash table, reusing as many + // existing nodes as posssible in the destination. + while (__other_iter && __own_iter->__next_) { + __node_pointer __next = __own_iter->__next_->__upcast(); + __assign_value(__next->__get_value(), __other_iter->__upcast()->__get_value()); + __next->__hash_ = __other_iter->__hash(); + + size_t __new_chash = std::__constrain_hash(__next->__hash_, __bucket_count); + if (__new_chash != __current_chash) { + __bucket_list_[__new_chash] = __own_iter; + __current_chash = __new_chash; + } + + __other_iter = __other_iter->__next_; + __own_iter = __own_iter->__next_; + } + + // At this point we either have consumed the whole incoming hash table, or we don't have any more nodes to reuse in + // the destination. Either continue with constructing new nodes, or deallocate the left over nodes. + if (__own_iter->__next_) { + __deallocate_node_list(__own_iter->__next_); + __own_iter->__next_ = nullptr; + } else { + __copy_construct(__other_iter, __own_iter, __current_chash); + } + + __size_ = __other.size(); + + return *this; } template @@ -1251,23 +1307,14 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign(__hash_table& __u, max_load_factor() = __u.max_load_factor(); if (bucket_count() != 0) { __next_pointer __cache = __detach(); -#if _LIBCPP_HAS_EXCEPTIONS - try { -#endif // _LIBCPP_HAS_EXCEPTIONS - const_iterator __i = __u.begin(); - while (__cache != nullptr && __u.size() != 0) { - __assign_value(__cache->__upcast()->__get_value(), std::move(__u.remove(__i++)->__get_value())); - __next_pointer __next = __cache->__next_; - __node_insert_multi(__cache->__upcast()); - __cache = __next; - } -#if _LIBCPP_HAS_EXCEPTIONS - } catch (...) { - __deallocate_node(__cache); - throw; + auto __guard = std::__make_scope_guard([&] { __deallocate_node_list(__cache); }); + const_iterator __i = __u.begin(); + while (__cache != nullptr && __u.size() != 0) { + __assign_value(__cache->__upcast()->__get_value(), std::move(__u.remove(__i++)->__get_value())); + __next_pointer __next = __cache->__next_; + __node_insert_multi(__cache->__upcast()); + __cache = __next; } -#endif // _LIBCPP_HAS_EXCEPTIONS - __deallocate_node(__cache); } const_iterator __i = __u.begin(); while (__u.size() != 0) @@ -1290,27 +1337,18 @@ template void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_unique(_InputIterator __first, _InputIterator __last) { typedef iterator_traits<_InputIterator> _ITraits; typedef typename _ITraits::value_type _ItValueType; - static_assert(is_same<_ItValueType, __container_value_type>::value, - "__assign_unique may only be called with the containers value type"); + static_assert( + is_same<_ItValueType, value_type>::value, "__assign_unique may only be called with the containers value type"); if (bucket_count() != 0) { __next_pointer __cache = __detach(); -#if _LIBCPP_HAS_EXCEPTIONS - try { -#endif // _LIBCPP_HAS_EXCEPTIONS - for (; __cache != nullptr && __first != __last; ++__first) { - __assign_value(__cache->__upcast()->__get_value(), *__first); - __next_pointer __next = __cache->__next_; - __node_insert_unique(__cache->__upcast()); - __cache = __next; - } -#if _LIBCPP_HAS_EXCEPTIONS - } catch (...) { - __deallocate_node(__cache); - throw; + auto __guard = std::__make_scope_guard([&] { __deallocate_node_list(__cache); }); + for (; __cache != nullptr && __first != __last; ++__first) { + __assign_value(__cache->__upcast()->__get_value(), *__first); + __next_pointer __next = __cache->__next_; + __node_insert_unique(__cache->__upcast()); + __cache = __next; } -#endif // _LIBCPP_HAS_EXCEPTIONS - __deallocate_node(__cache); } for (; __first != __last; ++__first) __emplace_unique(*__first); @@ -1321,31 +1359,20 @@ template void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_multi(_InputIterator __first, _InputIterator __last) { typedef iterator_traits<_InputIterator> _ITraits; typedef typename _ITraits::value_type _ItValueType; - static_assert( - (is_same<_ItValueType, __container_value_type>::value || is_same<_ItValueType, __node_value_type>::value), - "__assign_multi may only be called with the containers value type" - " or the nodes value type"); + static_assert(is_same<_ItValueType, value_type>::value, + "__assign_multi may only be called with the containers value type or the nodes value type"); if (bucket_count() != 0) { __next_pointer __cache = __detach(); -#if _LIBCPP_HAS_EXCEPTIONS - try { -#endif // _LIBCPP_HAS_EXCEPTIONS - for (; __cache != nullptr && __first != __last; ++__first) { - __assign_value(__cache->__upcast()->__get_value(), *__first); - __next_pointer __next = __cache->__next_; - __node_insert_multi(__cache->__upcast()); - __cache = __next; - } -#if _LIBCPP_HAS_EXCEPTIONS - } catch (...) { - __deallocate_node(__cache); - throw; + auto __guard = std::__make_scope_guard([&] { __deallocate_node_list(__cache); }); + for (; __cache != nullptr && __first != __last; ++__first) { + __assign_value(__cache->__upcast()->__get_value(), *__first); + __next_pointer __next = __cache->__next_; + __node_insert_multi(__cache->__upcast()); + __cache = __next; } -#endif // _LIBCPP_HAS_EXCEPTIONS - __deallocate_node(__cache); } for (; __first != __last; ++__first) - __emplace_multi(_NodeTypes::__get_value(*__first)); + __emplace_multi(*__first); } template @@ -1375,7 +1402,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::end() const _NOEXCEPT { template void __hash_table<_Tp, _Hash, _Equal, _Alloc>::clear() _NOEXCEPT { if (size() > 0) { - __deallocate_node(__first_node_.__next_); + __deallocate_node_list(__first_node_.__next_); __first_node_.__next_ = nullptr; size_type __bc = bucket_count(); for (size_type __i = 0; __i < __bc; ++__i) @@ -1561,69 +1588,6 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi(const_iterator __p return __node_insert_multi(__cp); } -template -template -pair::iterator, bool> -__hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_unique_key_args(_Key const& __k, _Args&&... __args) { - size_t __hash = hash_function()(__k); - size_type __bc = bucket_count(); - bool __inserted = false; - __next_pointer __nd; - size_t __chash; - if (__bc != 0) { - __chash = std::__constrain_hash(__hash, __bc); - __nd = __bucket_list_[__chash]; - if (__nd != nullptr) { - for (__nd = __nd->__next_; - __nd != nullptr && (__nd->__hash() == __hash || std::__constrain_hash(__nd->__hash(), __bc) == __chash); - __nd = __nd->__next_) { - if ((__nd->__hash() == __hash) && key_eq()(__nd->__upcast()->__get_value(), __k)) - goto __done; - } - } - } - { - __node_holder __h = __construct_node_hash(__hash, std::forward<_Args>(__args)...); - if (size() + 1 > __bc * max_load_factor() || __bc == 0) { - __rehash_unique(std::max( - 2 * __bc + !std::__is_hash_power2(__bc), size_type(__math::ceil(float(size() + 1) / max_load_factor())))); - __bc = bucket_count(); - __chash = std::__constrain_hash(__hash, __bc); - } - // insert_after __bucket_list_[__chash], or __first_node if bucket is null - __next_pointer __pn = __bucket_list_[__chash]; - if (__pn == nullptr) { - __pn = __first_node_.__ptr(); - __h->__next_ = __pn->__next_; - __pn->__next_ = __h.get()->__ptr(); - // fix up __bucket_list_ - __bucket_list_[__chash] = __pn; - if (__h->__next_ != nullptr) - __bucket_list_[std::__constrain_hash(__h->__next_->__hash(), __bc)] = __h.get()->__ptr(); - } else { - __h->__next_ = __pn->__next_; - __pn->__next_ = static_cast<__next_pointer>(__h.get()); - } - __nd = static_cast<__next_pointer>(__h.release()); - // increment size - ++size(); - __inserted = true; - } -__done: - return pair(iterator(__nd), __inserted); -} - -template -template -pair::iterator, bool> -__hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_unique_impl(_Args&&... __args) { - __node_holder __h = __construct_node(std::forward<_Args>(__args)...); - pair __r = __node_insert_unique(__h.get()); - if (__r.second) - __h.release(); - return __r; -} - template template typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator @@ -1764,41 +1728,45 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __n) _LIBCPP_D template template -void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __nbc) { - __pointer_allocator& __npa = __bucket_list_.get_deleter().__alloc(); - __bucket_list_.reset(__nbc > 0 ? __pointer_alloc_traits::allocate(__npa, __nbc) : nullptr); - __bucket_list_.get_deleter().size() = __nbc; - if (__nbc > 0) { - for (size_type __i = 0; __i < __nbc; ++__i) - __bucket_list_[__i] = nullptr; - __next_pointer __pp = __first_node_.__ptr(); - __next_pointer __cp = __pp->__next_; - if (__cp != nullptr) { - size_type __chash = std::__constrain_hash(__cp->__hash(), __nbc); - __bucket_list_[__chash] = __pp; - size_type __phash = __chash; - for (__pp = __cp, void(), __cp = __cp->__next_; __cp != nullptr; __cp = __pp->__next_) { - __chash = std::__constrain_hash(__cp->__hash(), __nbc); - if (__chash == __phash) - __pp = __cp; - else { - if (__bucket_list_[__chash] == nullptr) { - __bucket_list_[__chash] = __pp; - __pp = __cp; - __phash = __chash; - } else { - __next_pointer __np = __cp; - if _LIBCPP_CONSTEXPR_SINCE_CXX17 (!_UniqueKeys) { - for (; __np->__next_ != nullptr && - key_eq()(__cp->__upcast()->__get_value(), __np->__next_->__upcast()->__get_value()); - __np = __np->__next_) - ; - } - __pp->__next_ = __np->__next_; - __np->__next_ = __bucket_list_[__chash]->__next_; - __bucket_list_[__chash]->__next_ = __cp; - } +void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __bucket_count) { + __pointer_allocator& __ptr_alloc = __bucket_list_.get_deleter().__alloc(); + __bucket_list_.reset(__bucket_count > 0 ? __pointer_alloc_traits::allocate(__ptr_alloc, __bucket_count) : nullptr); + __bucket_list_.get_deleter().size() = __bucket_count; + + if (__bucket_count == 0) + return; + + for (size_type __i = 0; __i < __bucket_count; ++__i) + __bucket_list_[__i] = nullptr; + __next_pointer __pp = __first_node_.__ptr(); + __next_pointer __cp = __pp->__next_; + + if (!__cp) + return; + + size_type __chash = std::__constrain_hash(__cp->__hash(), __bucket_count); + __bucket_list_[__chash] = __pp; + size_type __phash = __chash; + for (__pp = __cp, void(), __cp = __cp->__next_; __cp != nullptr; __cp = __pp->__next_) { + __chash = std::__constrain_hash(__cp->__hash(), __bucket_count); + if (__chash == __phash) + __pp = __cp; + else { + if (__bucket_list_[__chash] == nullptr) { + __bucket_list_[__chash] = __pp; + __pp = __cp; + __phash = __chash; + } else { + __next_pointer __np = __cp; + if _LIBCPP_CONSTEXPR (!_UniqueKeys) { + for (; __np->__next_ != nullptr && + key_eq()(__cp->__upcast()->__get_value(), __np->__next_->__upcast()->__get_value()); + __np = __np->__next_) + ; } + __pp->__next_ = __np->__next_; + __np->__next_ = __bucket_list_[__chash]->__next_; + __bucket_list_[__chash]->__next_ = __cp; } } } @@ -1854,16 +1822,13 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node(_Args&&... __args) { __node_allocator& __na = __node_alloc(); __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na)); - // Begin the lifetime of the node itself. Note that this doesn't begin the lifetime of the value - // held inside the node, since we need to use the allocator's construct() method for that. + // Begin the lifetime of the node itself and the value_type contained within. // // We don't use the allocator's construct() method to construct the node itself since the // Cpp17FooInsertable named requirements don't require the allocator's construct() method // to work on anything other than the value_type. - std::__construct_at(std::addressof(*__h), /* next = */ nullptr, /* hash = */ 0); + std::__construct_at(std::addressof(*__h), /* hash = */ 0, __na, std::forward<_Args>(__args)...); - // Now construct the value_type using the allocator's construct() method. - __node_traits::construct(__na, _NodeTypes::__get_ptr(__h->__get_value()), std::forward<_Args>(__args)...); __h.get_deleter().__value_constructed = true; __h->__hash_ = hash_function()(__h->__get_value()); @@ -1871,15 +1836,13 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node(_Args&&... __args) { } template -template +template typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_holder -__hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node_hash(size_t __hash, _First&& __f, _Rest&&... __rest) { - static_assert(!__is_hash_value_type<_First, _Rest...>::value, "Construct cannot be called with a hash value type"); +__hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node_hash(size_t __hash, _Args&&... __args) { + static_assert(!__is_hash_value_type<_Args...>::value, "Construct cannot be called with a hash value type"); __node_allocator& __na = __node_alloc(); __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na)); - std::__construct_at(std::addressof(*__h), /* next = */ nullptr, /* hash = */ __hash); - __node_traits::construct( - __na, _NodeTypes::__get_ptr(__h->__get_value()), std::forward<_First>(__f), std::forward<_Rest>(__rest)...); + std::__construct_at(std::addressof(*__h), /* hash = */ __hash, __na, std::forward<_Args>(__args)...); __h.get_deleter().__value_constructed = true; return __h; } @@ -1899,12 +1862,63 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __p) { template typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __first, const_iterator __last) { - for (const_iterator __p = __first; __first != __last; __p = __first) { - ++__first; - erase(__p); + if (__first == __last) + return iterator(__last.__node_); + + // current node + __next_pointer __current = __first.__node_; + size_type __bucket_count = bucket_count(); + size_t __chash = std::__constrain_hash(__current->__hash(), __bucket_count); + // find previous node + __next_pointer __before_first = __bucket_list_[__chash]; + for (; __before_first->__next_ != __current; __before_first = __before_first->__next_) + ; + + __next_pointer __last_node = __last.__node_; + + // If __before_first is in the same bucket (i.e. the first element we erase is not the first in the bucket), clear + // this bucket first without re-linking it + if (__before_first != __first_node_.__ptr() && + std::__constrain_hash(__before_first->__hash(), __bucket_count) == __chash) { + while (__current != __last_node) { + auto __next = __current->__next_; + __deallocate_node(__current->__upcast()); + __current = __next; + --__size_; + + if (__next) { + if (auto __next_chash = std::__constrain_hash(__next->__hash(), __bucket_count); __next_chash != __chash) { + __bucket_list_[__next_chash] = __before_first; + __chash = __next_chash; + break; + } + } + } } - __next_pointer __np = __last.__node_; - return iterator(__np); + + while (__current != __last_node) { + auto __next = __current->__next_; + __deallocate_node(__current->__upcast()); + __current = __next; + --__size_; + + // When switching buckets, set the old bucket to be empty and update the next bucket to have __before_first as its + // before-first element + if (__next) { + if (auto __next_chash = std::__constrain_hash(__next->__hash(), __bucket_count); __next_chash != __chash) { + __bucket_list_[__chash] = nullptr; + __bucket_list_[__next_chash] = __before_first; + __chash = __next_chash; + } + } else { // When __next is a nullptr we've fully erased the last bucket. Update the bucket list accordingly. + __bucket_list_[__chash] = nullptr; + } + } + + // re-link __before_first with __last + __before_first->__next_ = __current; + + return iterator(__last.__node_); } template diff --git a/lib/libcxx/include/__ios/fpos.h b/lib/libcxx/include/__ios/fpos.h index e5c21b4391..af114421c8 100644 --- a/lib/libcxx/include/__ios/fpos.h +++ b/lib/libcxx/include/__ios/fpos.h @@ -30,7 +30,7 @@ public: _LIBCPP_HIDE_FROM_ABI operator streamoff() const { return __off_; } - _LIBCPP_HIDE_FROM_ABI _StateT state() const { return __st_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _StateT state() const { return __st_; } _LIBCPP_HIDE_FROM_ABI void state(_StateT __st) { __st_ = __st; } _LIBCPP_HIDE_FROM_ABI fpos& operator+=(streamoff __off) { @@ -38,7 +38,7 @@ public: return *this; } - _LIBCPP_HIDE_FROM_ABI fpos operator+(streamoff __off) const { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI fpos operator+(streamoff __off) const { fpos __t(*this); __t += __off; return __t; @@ -49,7 +49,7 @@ public: return *this; } - _LIBCPP_HIDE_FROM_ABI fpos operator-(streamoff __off) const { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI fpos operator-(streamoff __off) const { fpos __t(*this); __t -= __off; return __t; @@ -57,7 +57,7 @@ public: }; template -inline _LIBCPP_HIDE_FROM_ABI streamoff operator-(const fpos<_StateT>& __x, const fpos<_StateT>& __y) { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI streamoff operator-(const fpos<_StateT>& __x, const fpos<_StateT>& __y) { return streamoff(__x) - streamoff(__y); } diff --git a/lib/libcxx/include/__iterator/back_insert_iterator.h b/lib/libcxx/include/__iterator/back_insert_iterator.h index 3a11fae4cb..d051c08751 100644 --- a/lib/libcxx/include/__iterator/back_insert_iterator.h +++ b/lib/libcxx/include/__iterator/back_insert_iterator.h @@ -26,15 +26,9 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -_LIBCPP_SUPPRESS_DEPRECATED_PUSH template class back_insert_iterator -#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) - : public iterator -#endif -{ - _LIBCPP_SUPPRESS_DEPRECATED_POP - + : public __iterator_base, output_iterator_tag, void, void, void, void> { protected: _Container* container; diff --git a/lib/libcxx/include/__iterator/bounded_iter.h b/lib/libcxx/include/__iterator/bounded_iter.h index d12750d1f8..d2a0906112 100644 --- a/lib/libcxx/include/__iterator/bounded_iter.h +++ b/lib/libcxx/include/__iterator/bounded_iter.h @@ -74,12 +74,12 @@ struct __bounded_iter { _LIBCPP_HIDE_FROM_ABI __bounded_iter(__bounded_iter const&) = default; _LIBCPP_HIDE_FROM_ABI __bounded_iter(__bounded_iter&&) = default; - template < class _OtherIterator, - __enable_if_t< - _And< is_convertible, - _Or >, - is_same > > > >::value, - int> = 0> + template , + _Or >, + is_same > > > >::value, + int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __bounded_iter(__bounded_iter<_OtherIterator> const& __other) _NOEXCEPT : __current_(__other.__current_), __begin_(__other.__begin_), @@ -116,8 +116,7 @@ public: // These operations check that the iterator is dereferenceable. Since the class invariant is // that the iterator is always within `[begin, end]`, we only need to check it's not pointing to // `end`. This is easier for the optimizer because it aligns with the `iter != container.end()` - // checks that typical callers already use (see - // https://github.com/llvm/llvm-project/issues/78829). + // checks that typical callers already use (see https://llvm.org/PR78829). _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 reference operator*() const _NOEXCEPT { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( __current_ != __end_, "__bounded_iter::operator*: Attempt to dereference an iterator at the end"); diff --git a/lib/libcxx/include/__iterator/concepts.h b/lib/libcxx/include/__iterator/concepts.h index 20a1ab4691..3b43920443 100644 --- a/lib/libcxx/include/__iterator/concepts.h +++ b/lib/libcxx/include/__iterator/concepts.h @@ -117,15 +117,12 @@ template concept __signed_integer_like = signed_integral<_Tp>; template -concept weakly_incrementable = - // TODO: remove this once the clang bug is fixed (bugs.llvm.org/PR48173). - !same_as<_Ip, bool> && // Currently, clang does not handle bool correctly. - movable<_Ip> && requires(_Ip __i) { - typename iter_difference_t<_Ip>; - requires __signed_integer_like>; - { ++__i } -> same_as<_Ip&>; // not required to be equality-preserving - __i++; // not required to be equality-preserving - }; +concept weakly_incrementable = movable<_Ip> && requires(_Ip __i) { + typename iter_difference_t<_Ip>; + requires __signed_integer_like>; + { ++__i } -> same_as<_Ip&>; // not required to be equality-preserving + __i++; // not required to be equality-preserving +}; // [iterator.concept.inc] template diff --git a/lib/libcxx/include/__iterator/cpp17_iterator_concepts.h b/lib/libcxx/include/__iterator/cpp17_iterator_concepts.h index ba3536b686..ecd30d8e11 100644 --- a/lib/libcxx/include/__iterator/cpp17_iterator_concepts.h +++ b/lib/libcxx/include/__iterator/cpp17_iterator_concepts.h @@ -68,7 +68,8 @@ concept __cpp17_default_constructible = is_default_constructible_v<_Tp>; template concept __cpp17_iterator = __cpp17_copy_constructible<_Iter> && __cpp17_copy_assignable<_Iter> && __cpp17_destructible<_Iter> && - (is_signed_v<__iter_diff_t<_Iter>> || is_void_v<__iter_diff_t<_Iter>>) && requires(_Iter __iter) { + (is_signed_v<__iterator_difference_type<_Iter>> || is_void_v<__iterator_difference_type<_Iter>>) && + requires(_Iter __iter) { { *__iter }; { ++__iter } -> same_as<_Iter&>; }; @@ -81,8 +82,8 @@ concept __cpp17_input_iterator = { __lhs != std::as_const(__rhs) } -> __boolean_testable; { std::as_const(__lhs) != std::as_const(__rhs) } -> __boolean_testable; - { *__lhs } -> same_as<__iter_reference<_Iter>>; - { *std::as_const(__lhs) } -> same_as<__iter_reference<_Iter>>; + { *__lhs } -> same_as<__iterator_reference<_Iter>>; + { *std::as_const(__lhs) } -> same_as<__iterator_reference<_Iter>>; { ++__lhs } -> same_as<_Iter&>; { (void)__lhs++ }; @@ -101,19 +102,19 @@ template concept __cpp17_forward_iterator = __cpp17_input_iterator<_Iter> && __cpp17_default_constructible<_Iter> && requires(_Iter __iter) { { __iter++ } -> convertible_to; - { *__iter++ } -> same_as<__iter_reference<_Iter>>; + { *__iter++ } -> same_as<__iterator_reference<_Iter>>; }; template concept __cpp17_bidirectional_iterator = __cpp17_forward_iterator<_Iter> && requires(_Iter __iter) { { --__iter } -> same_as<_Iter&>; { __iter-- } -> convertible_to; - { *__iter-- } -> same_as<__iter_reference<_Iter>>; + { *__iter-- } -> same_as<__iterator_reference<_Iter>>; }; template concept __cpp17_random_access_iterator = - __cpp17_bidirectional_iterator<_Iter> && requires(_Iter __iter, __iter_diff_t<_Iter> __n) { + __cpp17_bidirectional_iterator<_Iter> && requires(_Iter __iter, __iterator_difference_type<_Iter> __n) { { __iter += __n } -> same_as<_Iter&>; { __iter + __n } -> same_as<_Iter>; @@ -125,13 +126,13 @@ concept __cpp17_random_access_iterator = { __iter - __n } -> same_as<_Iter>; { std::as_const(__iter) - __n } -> same_as<_Iter>; - { __iter - __iter } -> same_as<__iter_diff_t<_Iter>>; - { std::as_const(__iter) - __iter } -> same_as<__iter_diff_t<_Iter>>; - { __iter - std::as_const(__iter) } -> same_as<__iter_diff_t<_Iter>>; - { std::as_const(__iter) - std::as_const(__iter) } -> same_as<__iter_diff_t<_Iter>>; + { __iter - __iter } -> same_as<__iterator_difference_type<_Iter>>; + { std::as_const(__iter) - __iter } -> same_as<__iterator_difference_type<_Iter>>; + { __iter - std::as_const(__iter) } -> same_as<__iterator_difference_type<_Iter>>; + { std::as_const(__iter) - std::as_const(__iter) } -> same_as<__iterator_difference_type<_Iter>>; - { __iter[__n] } -> convertible_to<__iter_reference<_Iter>>; - { std::as_const(__iter)[__n] } -> convertible_to<__iter_reference<_Iter>>; + { __iter[__n] } -> convertible_to<__iterator_reference<_Iter>>; + { std::as_const(__iter)[__n] } -> convertible_to<__iterator_reference<_Iter>>; { __iter < __iter } -> __boolean_testable; { std::as_const(__iter) < __iter } -> __boolean_testable; diff --git a/lib/libcxx/include/__iterator/distance.h b/lib/libcxx/include/__iterator/distance.h index 1732aa527f..1a9fbf27f7 100644 --- a/lib/libcxx/include/__iterator/distance.h +++ b/lib/libcxx/include/__iterator/distance.h @@ -10,41 +10,66 @@ #ifndef _LIBCPP___ITERATOR_DISTANCE_H #define _LIBCPP___ITERATOR_DISTANCE_H +#include <__algorithm/for_each_segment.h> +#include <__concepts/same_as.h> #include <__config> #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> #include <__iterator/iterator_traits.h> +#include <__iterator/segmented_iterator.h> #include <__ranges/access.h> #include <__ranges/concepts.h> #include <__ranges/size.h> #include <__type_traits/decay.h> +#include <__type_traits/enable_if.h> #include <__type_traits/remove_cvref.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_InputIter>::difference_type -__distance(_InputIter __first, _InputIter __last, input_iterator_tag) { - typename iterator_traits<_InputIter>::difference_type __r(0); - for (; __first != __last; ++__first) - ++__r; - return __r; +#if _LIBCPP_STD_VER >= 20 +template +using __iter_distance_t _LIBCPP_NODEBUG = std::iter_difference_t<_Iter>; +#else +template +using __iter_distance_t _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::difference_type; +#endif + +template ::value, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 __iter_distance_t<_RandIter> +__distance(_RandIter __first, _RandIter __last) { + return __last - __first; } -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_RandIter>::difference_type -__distance(_RandIter __first, _RandIter __last, random_access_iterator_tag) { - return __last - __first; +template +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 __iter_distance_t<_InputIter> +__distance(_InputIter __first, _Sent __last) { + __iter_distance_t<_InputIter> __r(0); +#if _LIBCPP_STD_VER >= 20 + if constexpr (same_as<_InputIter, _Sent> && __is_segmented_iterator_v<_InputIter>) { + std::__for_each_segment(__first, __last, [&__r](auto __lfirst, auto __llast) { + __r += std::__distance(__lfirst, __llast); + }); + } else +#endif + { + for (; __first != __last; ++__first) + ++__r; + } + return __r; } template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_InputIter>::difference_type distance(_InputIter __first, _InputIter __last) { - return std::__distance(__first, __last, typename iterator_traits<_InputIter>::iterator_category()); + return std::__distance(__first, __last); } #if _LIBCPP_STD_VER >= 20 @@ -56,12 +81,7 @@ struct __distance { template _Sp> requires(!sized_sentinel_for<_Sp, _Ip>) _LIBCPP_HIDE_FROM_ABI constexpr iter_difference_t<_Ip> operator()(_Ip __first, _Sp __last) const { - iter_difference_t<_Ip> __n = 0; - while (__first != __last) { - ++__first; - ++__n; - } - return __n; + return std::__distance(std::move(__first), std::move(__last)); } template > _Sp> @@ -92,4 +112,6 @@ inline constexpr auto distance = __distance{}; _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ITERATOR_DISTANCE_H diff --git a/lib/libcxx/include/__iterator/front_insert_iterator.h b/lib/libcxx/include/__iterator/front_insert_iterator.h index d79c4d78b6..2ab5383a1d 100644 --- a/lib/libcxx/include/__iterator/front_insert_iterator.h +++ b/lib/libcxx/include/__iterator/front_insert_iterator.h @@ -26,15 +26,9 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -_LIBCPP_SUPPRESS_DEPRECATED_PUSH template class front_insert_iterator -#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) - : public iterator -#endif -{ - _LIBCPP_SUPPRESS_DEPRECATED_POP - + : public __iterator_base, output_iterator_tag, void, void, void, void> { protected: _Container* container; diff --git a/lib/libcxx/include/__iterator/insert_iterator.h b/lib/libcxx/include/__iterator/insert_iterator.h index 95768cb8e0..6a5818b478 100644 --- a/lib/libcxx/include/__iterator/insert_iterator.h +++ b/lib/libcxx/include/__iterator/insert_iterator.h @@ -35,15 +35,9 @@ template using __insert_iterator_iter_t _LIBCPP_NODEBUG = typename _Container::iterator; #endif -_LIBCPP_SUPPRESS_DEPRECATED_PUSH template class insert_iterator -#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) - : public iterator -#endif -{ - _LIBCPP_SUPPRESS_DEPRECATED_POP - + : public __iterator_base, output_iterator_tag, void, void, void, void> { protected: _Container* container; __insert_iterator_iter_t<_Container> iter; diff --git a/lib/libcxx/include/__iterator/istream_iterator.h b/lib/libcxx/include/__iterator/istream_iterator.h index cdb8056cfe..f4b13f09c7 100644 --- a/lib/libcxx/include/__iterator/istream_iterator.h +++ b/lib/libcxx/include/__iterator/istream_iterator.h @@ -25,15 +25,14 @@ _LIBCPP_BEGIN_NAMESPACE_STD -_LIBCPP_SUPPRESS_DEPRECATED_PUSH template , class _Distance = ptrdiff_t> class istream_iterator -#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) - : public iterator -#endif -{ - _LIBCPP_SUPPRESS_DEPRECATED_POP - + : public __iterator_base, + input_iterator_tag, + _Tp, + _Distance, + const _Tp*, + const _Tp&> { public: typedef input_iterator_tag iterator_category; typedef _Tp value_type; diff --git a/lib/libcxx/include/__iterator/istreambuf_iterator.h b/lib/libcxx/include/__iterator/istreambuf_iterator.h index b7b28cd1a0..4fc87a84f0 100644 --- a/lib/libcxx/include/__iterator/istreambuf_iterator.h +++ b/lib/libcxx/include/__iterator/istreambuf_iterator.h @@ -25,15 +25,14 @@ _LIBCPP_BEGIN_NAMESPACE_STD -_LIBCPP_SUPPRESS_DEPRECATED_PUSH template class istreambuf_iterator -#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) - : public iterator -#endif -{ - _LIBCPP_SUPPRESS_DEPRECATED_POP - + : public __iterator_base, + input_iterator_tag, + _CharT, + typename _Traits::off_type, + _CharT*, + _CharT> { public: typedef input_iterator_tag iterator_category; typedef _CharT value_type; diff --git a/lib/libcxx/include/__iterator/iter_move.h b/lib/libcxx/include/__iterator/iter_move.h index 5cc1615259..a726b6e329 100644 --- a/lib/libcxx/include/__iterator/iter_move.h +++ b/lib/libcxx/include/__iterator/iter_move.h @@ -40,7 +40,7 @@ void iter_move() = delete; template concept __unqualified_iter_move = __class_or_enum> && requires(_Tp&& __t) { - // NOLINTNEXTLINE(libcpp-robust-against-adl) iter_swap ADL calls should only be made through ranges::iter_swap + // NOLINTNEXTLINE(libcpp-robust-against-adl) iter_move ADL calls should only be made through ranges::iter_move iter_move(std::forward<_Tp>(__t)); }; diff --git a/lib/libcxx/include/__iterator/iterator.h b/lib/libcxx/include/__iterator/iterator.h index d7fcd8c4dd..c599f61797 100644 --- a/lib/libcxx/include/__iterator/iterator.h +++ b/lib/libcxx/include/__iterator/iterator.h @@ -28,6 +28,19 @@ struct _LIBCPP_DEPRECATED_IN_CXX17 iterator { typedef _Category iterator_category; }; +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#ifdef _LIBCPP_ABI_NO_ITERATOR_BASES +template +struct __no_iterator_base {}; + +template +using __iterator_base _LIBCPP_NODEBUG = __no_iterator_base<_Derived>; +#else +template +using __iterator_base _LIBCPP_NODEBUG = iterator<_Category, _Tp, _Distance, _Pointer, _Reference>; +#endif +_LIBCPP_SUPPRESS_DEPRECATED_POP + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___ITERATOR_ITERATOR_H diff --git a/lib/libcxx/include/__iterator/iterator_traits.h b/lib/libcxx/include/__iterator/iterator_traits.h index f727e8ff36..ebf315a53b 100644 --- a/lib/libcxx/include/__iterator/iterator_traits.h +++ b/lib/libcxx/include/__iterator/iterator_traits.h @@ -420,44 +420,43 @@ using __has_exactly_bidirectional_iterator_category _LIBCPP_NODEBUG = !__has_iterator_category_convertible_to<_Tp, random_access_iterator_tag>::value>; template -using __iter_value_type _LIBCPP_NODEBUG = typename iterator_traits<_InputIterator>::value_type; +using __iterator_value_type _LIBCPP_NODEBUG = typename iterator_traits<_InputIterator>::value_type; #if _LIBCPP_STD_VER >= 23 template -using __iter_key_type _LIBCPP_NODEBUG = remove_const_t>>; +using __iter_key_type _LIBCPP_NODEBUG = remove_const_t>>; template -using __iter_mapped_type _LIBCPP_NODEBUG = tuple_element_t<1, __iter_value_type<_InputIterator>>; +using __iter_mapped_type _LIBCPP_NODEBUG = tuple_element_t<1, __iterator_value_type<_InputIterator>>; template using __iter_to_alloc_type _LIBCPP_NODEBUG = - pair>, - tuple_element_t<1, __iter_value_type<_InputIterator>>>; + pair>, + tuple_element_t<1, __iterator_value_type<_InputIterator>>>; #else template -using __iter_key_type _LIBCPP_NODEBUG = - __remove_const_t::value_type::first_type>; +using __iter_key_type _LIBCPP_NODEBUG = __remove_const_t::first_type>; template -using __iter_mapped_type _LIBCPP_NODEBUG = typename iterator_traits<_InputIterator>::value_type::second_type; +using __iter_mapped_type _LIBCPP_NODEBUG = typename __iterator_value_type<_InputIterator>::second_type; template using __iter_to_alloc_type _LIBCPP_NODEBUG = - pair::value_type::first_type, - typename iterator_traits<_InputIterator>::value_type::second_type>; + pair::first_type, + typename __iterator_value_type<_InputIterator>::second_type>; #endif // _LIBCPP_STD_VER >= 23 template -using __iterator_category_type _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::iterator_category; +using __iterator_iterator_category _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::iterator_category; template -using __iterator_pointer_type _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::pointer; +using __iterator_pointer _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::pointer; template -using __iter_diff_t _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::difference_type; +using __iterator_difference_type _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::difference_type; template -using __iter_reference _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::reference; +using __iterator_reference _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::reference; #if _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__iterator/ostream_iterator.h b/lib/libcxx/include/__iterator/ostream_iterator.h index 2b459f4628..64e79f010f 100644 --- a/lib/libcxx/include/__iterator/ostream_iterator.h +++ b/lib/libcxx/include/__iterator/ostream_iterator.h @@ -24,15 +24,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD -_LIBCPP_SUPPRESS_DEPRECATED_PUSH template > class ostream_iterator -#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) - : public iterator -#endif -{ - _LIBCPP_SUPPRESS_DEPRECATED_POP - + : public __iterator_base, output_iterator_tag, void, void, void, void> { public: typedef output_iterator_tag iterator_category; typedef void value_type; diff --git a/lib/libcxx/include/__iterator/ostreambuf_iterator.h b/lib/libcxx/include/__iterator/ostreambuf_iterator.h index 7133331a7b..4a3b2fa024 100644 --- a/lib/libcxx/include/__iterator/ostreambuf_iterator.h +++ b/lib/libcxx/include/__iterator/ostreambuf_iterator.h @@ -25,15 +25,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD -_LIBCPP_SUPPRESS_DEPRECATED_PUSH template class ostreambuf_iterator -#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) - : public iterator -#endif -{ - _LIBCPP_SUPPRESS_DEPRECATED_POP - + : public __iterator_base, output_iterator_tag, void, void, void, void> { public: typedef output_iterator_tag iterator_category; typedef void value_type; diff --git a/lib/libcxx/include/__iterator/reverse_iterator.h b/lib/libcxx/include/__iterator/reverse_iterator.h index 8935e5a8ff..834695dd16 100644 --- a/lib/libcxx/include/__iterator/reverse_iterator.h +++ b/lib/libcxx/include/__iterator/reverse_iterator.h @@ -46,21 +46,16 @@ _LIBCPP_BEGIN_NAMESPACE_STD -_LIBCPP_SUPPRESS_DEPRECATED_PUSH template class reverse_iterator -#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) - : public iterator::iterator_category, - typename iterator_traits<_Iter>::value_type, - typename iterator_traits<_Iter>::difference_type, - typename iterator_traits<_Iter>::pointer, - typename iterator_traits<_Iter>::reference> -#endif -{ - _LIBCPP_SUPPRESS_DEPRECATED_POP - + : public __iterator_base, + typename iterator_traits<_Iter>::iterator_category, + typename iterator_traits<_Iter>::value_type, + typename iterator_traits<_Iter>::difference_type, + typename iterator_traits<_Iter>::pointer, + typename iterator_traits<_Iter>::reference> { private: -#ifndef _LIBCPP_ABI_NO_ITERATOR_BASES +#ifndef _LIBCPP_ABI_NO_REVERSE_ITERATOR_SECOND_MEMBER _Iter __t_; // no longer used as of LWG #2360, not removed due to ABI break #endif @@ -91,7 +86,7 @@ public: using reference = typename iterator_traits<_Iter>::reference; #endif -#ifndef _LIBCPP_ABI_NO_ITERATOR_BASES +#ifndef _LIBCPP_ABI_NO_REVERSE_ITERATOR_SECOND_MEMBER _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reverse_iterator() : __t_(), current() {} _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 explicit reverse_iterator(_Iter __x) : __t_(__x), current(__x) {} diff --git a/lib/libcxx/include/__iterator/segmented_iterator.h b/lib/libcxx/include/__iterator/segmented_iterator.h index af27a7be41..dc56a74013 100644 --- a/lib/libcxx/include/__iterator/segmented_iterator.h +++ b/lib/libcxx/include/__iterator/segmented_iterator.h @@ -67,18 +67,13 @@ struct __segmented_iterator_traits; */ template -struct __has_specialization : false_type {}; +inline const bool __has_specialization_v = false; template -struct __has_specialization<_Tp, sizeof(_Tp) * 0> : true_type {}; +inline const bool __has_specialization_v<_Tp, sizeof(_Tp) * 0> = true; template -using __is_segmented_iterator _LIBCPP_NODEBUG = __has_specialization<__segmented_iterator_traits<_Iterator> >; - -template -struct __has_random_access_local_iterator - : __has_random_access_iterator_category< - typename __segmented_iterator_traits< _SegmentedIterator >::__local_iterator > {}; +inline const bool __is_segmented_iterator_v = __has_specialization_v<__segmented_iterator_traits<_Iterator> >; _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__iterator/static_bounded_iter.h b/lib/libcxx/include/__iterator/static_bounded_iter.h index 8f4fbdf6df..d8fc7d185e 100644 --- a/lib/libcxx/include/__iterator/static_bounded_iter.h +++ b/lib/libcxx/include/__iterator/static_bounded_iter.h @@ -99,9 +99,9 @@ struct __static_bounded_iter { template , - _Or >, - is_same > > > >::value, + _And, + _Or >, + is_same > > > >::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __static_bounded_iter(__static_bounded_iter<_OtherIterator, _Size> const& __other) _NOEXCEPT diff --git a/lib/libcxx/include/__iterator/wrap_iter.h b/lib/libcxx/include/__iterator/wrap_iter.h index 2b5bc489dd..98745f600a 100644 --- a/lib/libcxx/include/__iterator/wrap_iter.h +++ b/lib/libcxx/include/__iterator/wrap_iter.h @@ -49,12 +49,12 @@ private: public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __wrap_iter() _NOEXCEPT : __i_() {} - template < - class _OtherIter, - __enable_if_t< _And< is_convertible, - _Or >, - is_same > > > >::value, - int> = 0> + template , + _Or >, + is_same > > > >::value, + int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __wrap_iter(const __wrap_iter<_OtherIter>& __u) _NOEXCEPT : __i_(__u.__i_) {} _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 reference operator*() const _NOEXCEPT { return *__i_; } @@ -117,6 +117,8 @@ private: friend class span; template friend struct array; + template + friend struct __optional_iterator; }; template diff --git a/lib/libcxx/include/__locale b/lib/libcxx/include/__locale index 757a53951f..5b1787451f 100644 --- a/lib/libcxx/include/__locale +++ b/lib/libcxx/include/__locale @@ -57,9 +57,8 @@ _LIBCPP_HIDE_FROM_ABI const _Facet& use_facet(const locale&); class _LIBCPP_EXPORTED_FROM_ABI locale { public: // locale is essentially a shared_ptr that doesn't support weak_ptrs and never got a move constructor, - // so it is trivially relocatable. Like shared_ptr, it is also replaceable. + // so it is trivially relocatable. using __trivially_relocatable _LIBCPP_NODEBUG = locale; - using __replaceable _LIBCPP_NODEBUG = locale; // types: class _LIBCPP_EXPORTED_FROM_ABI facet; @@ -389,7 +388,7 @@ public: static const mask xdigit = _ISXDIGIT; static const mask blank = _ISBLANK; static const mask __regex_word = 0x8000; -# elif defined(_NEWLIB_VERSION) +# elif _LIBCPP_LIBC_NEWLIB // Same type as Newlib's _ctype_ array in newlib/libc/include/ctype.h. typedef char mask; // In case char is signed, static_cast is needed to avoid warning on @@ -585,7 +584,7 @@ public: # ifdef _CACHED_RUNES static const size_t table_size = _CACHED_RUNES; # else - static const size_t table_size = 256; // FIXME: Don't hardcode this. + static const size_t table_size = 256; # endif _LIBCPP_HIDE_FROM_ABI const mask* table() const _NOEXCEPT { return __tab_; } static const mask* classic_table() _NOEXCEPT; @@ -1478,9 +1477,6 @@ public: protected: ~numpunct_byname() override; - -private: - void __init(const char*); }; # if _LIBCPP_HAS_WIDE_CHARACTERS @@ -1495,9 +1491,6 @@ public: protected: ~numpunct_byname() override; - -private: - void __init(const char*); }; # endif // _LIBCPP_HAS_WIDE_CHARACTERS diff --git a/lib/libcxx/include/__locale_dir/locale_base_api.h b/lib/libcxx/include/__locale_dir/locale_base_api.h index 5e6c69e95e..0474c1db35 100644 --- a/lib/libcxx/include/__locale_dir/locale_base_api.h +++ b/lib/libcxx/include/__locale_dir/locale_base_api.h @@ -57,15 +57,11 @@ // float __strtof(const char*, char**, __locale_t); // double __strtod(const char*, char**, __locale_t); // long double __strtold(const char*, char**, __locale_t); -// long long __strtoll(const char*, char**, __locale_t); -// unsigned long long __strtoull(const char*, char**, __locale_t); // } // // Character manipulation functions // -------------------------------- // namespace __locale { -// int __isdigit(int, __locale_t); // required by the headers -// int __isxdigit(int, __locale_t); // required by the headers // int __toupper(int, __locale_t); // int __tolower(int, __locale_t); // int __strcoll(const char*, const char*, __locale_t); @@ -106,7 +102,6 @@ // // int __snprintf(char*, size_t, __locale_t, const char*, ...); // required by the headers // int __asprintf(char**, __locale_t, const char*, ...); // required by the headers -// int __sscanf(const char*, __locale_t, const char*, ...); // required by the headers // } #if _LIBCPP_HAS_LOCALIZATION @@ -115,7 +110,6 @@ # include <__locale_dir/support/apple.h> # elif defined(__FreeBSD__) # include <__locale_dir/support/freebsd.h> -/* zig patch: https://github.com/llvm/llvm-project/pull/143055 */ # elif defined(__NetBSD__) # include <__locale_dir/support/netbsd.h> # elif defined(_LIBCPP_MSVCRT_LIKE) @@ -124,20 +118,20 @@ # include <__locale_dir/support/fuchsia.h> # elif defined(__linux__) # include <__locale_dir/support/linux.h> +# elif _LIBCPP_LIBC_NEWLIB +# include <__locale_dir/support/newlib.h> +# elif defined(_AIX) +# include <__locale_dir/support/aix.h> # else // TODO: This is a temporary definition to bridge between the old way we defined the locale base API // (by providing global non-reserved names) and the new API. As we move individual platforms // towards the new way of defining the locale base API, this should disappear since each platform // will define those directly. -# if defined(_AIX) || defined(__MVS__) +# if defined(__MVS__) # include <__locale_dir/locale_base_api/ibm.h> -# elif defined(__ANDROID__) -# include <__locale_dir/locale_base_api/android.h> # elif defined(__OpenBSD__) # include <__locale_dir/locale_base_api/openbsd.h> -# elif defined(__wasi__) || _LIBCPP_HAS_MUSL_LIBC -# include <__locale_dir/locale_base_api/musl.h> # endif # include <__locale_dir/locale_base_api/bsd_locale_fallbacks.h> @@ -197,21 +191,9 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __ return strtold_l(__nptr, __endptr, __loc); } -inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { - return strtoll_l(__nptr, __endptr, __base, __loc); -} - -inline _LIBCPP_HIDE_FROM_ABI unsigned long long -__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { - return strtoull_l(__nptr, __endptr, __base, __loc); -} - // // Character manipulation functions // -inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __ch, __locale_t __loc) { return isdigit_l(__ch, __loc); } -inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __ch, __locale_t __loc) { return isxdigit_l(__ch, __loc); } - # if defined(_LIBCPP_BUILDING_LIBRARY) inline _LIBCPP_HIDE_FROM_ABI int __strcoll(const char* __s1, const char* __s2, __locale_t __loc) { return strcoll_l(__s1, __s2, __loc); @@ -307,11 +289,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __ char** __s, __locale_t __loc, const char* __format, _Args&&... __args) { return std::__libcpp_asprintf_l(__s, __loc, __format, std::forward<_Args>(__args)...); } -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf( - const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) { - return std::__libcpp_sscanf_l(__s, __loc, __format, std::forward<_Args>(__args)...); -} _LIBCPP_DIAGNOSTIC_POP # undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT diff --git a/lib/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h b/lib/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h index b62a1b737e..8cdbe0cd15 100644 --- a/lib/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h +++ b/lib/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h @@ -125,16 +125,6 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __libcpp_asprintf_l( return __res; } -inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __libcpp_sscanf_l( - const char* __s, locale_t __l, const char* __format, ...) { - va_list __va; - va_start(__va, __format); - __locale_guard __current(__l); - int __res = vsscanf(__s, __format, __va); - va_end(__va); - return __res; -} - _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H diff --git a/lib/libcxx/include/__locale_dir/messages.h b/lib/libcxx/include/__locale_dir/messages.h index c04bf04025..686f472840 100644 --- a/lib/libcxx/include/__locale_dir/messages.h +++ b/lib/libcxx/include/__locale_dir/messages.h @@ -22,7 +22,7 @@ # if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) // Most unix variants have catopen. These are the specific ones that don't. -# if !defined(__BIONIC__) && !defined(_NEWLIB_VERSION) && !defined(__EMSCRIPTEN__) +# if !defined(__BIONIC__) && !_LIBCPP_LIBC_NEWLIB && !defined(__EMSCRIPTEN__) # define _LIBCPP_HAS_CATOPEN 1 # include # else diff --git a/lib/libcxx/include/__locale_dir/money.h b/lib/libcxx/include/__locale_dir/money.h index c129666550..12ba38467d 100644 --- a/lib/libcxx/include/__locale_dir/money.h +++ b/lib/libcxx/include/__locale_dir/money.h @@ -433,7 +433,7 @@ bool money_get<_CharT, _InputIterator>::__do_get( __err |= ios_base::failbit; return false; } - for (++__b; __fd > 0; --__fd, ++__b) { + for (++__b; __fd > 0; --__fd, (void)++__b) { if (__b == __e || !__ct.is(ctype_base::digit, *__b)) { __err |= ios_base::failbit; return false; @@ -451,7 +451,7 @@ bool money_get<_CharT, _InputIterator>::__do_get( } } if (__trailing_sign) { - for (unsigned __i = 1; __i < __trailing_sign->size(); ++__i, ++__b) { + for (unsigned __i = 1; __i < __trailing_sign->size(); ++__i, (void)++__b) { if (__b == __e || *__b != (*__trailing_sign)[__i]) { __err |= ios_base::failbit; return false; diff --git a/lib/libcxx/include/__locale_dir/num.h b/lib/libcxx/include/__locale_dir/num.h index 7ca8ffe348..b7ea02e7cb 100644 --- a/lib/libcxx/include/__locale_dir/num.h +++ b/lib/libcxx/include/__locale_dir/num.h @@ -9,8 +9,10 @@ #ifndef _LIBCPP___LOCALE_DIR_NUM_H #define _LIBCPP___LOCALE_DIR_NUM_H +#include <__algorithm/copy.h> #include <__algorithm/find.h> #include <__algorithm/reverse.h> +#include <__algorithm/simd_utils.h> #include <__charconv/to_chars_integral.h> #include <__charconv/traits.h> #include <__config> @@ -22,6 +24,7 @@ #include <__locale_dir/scan_keyword.h> #include <__memory/unique_ptr.h> #include <__system_error/errc.h> +#include <__type_traits/is_signed.h> #include #include #include @@ -46,9 +49,9 @@ struct _LIBCPP_EXPORTED_FROM_ABI __num_get_base { static int __get_base(ios_base&); static const char __src[33]; // "0123456789abcdefABCDEFxX+-pPiInN" // count of leading characters in __src used for parsing integers ("012..X+-") - static const size_t __int_chr_cnt = 26; + static inline const size_t __int_chr_cnt = 26; // count of leading characters in __src used for parsing floating-point values ("012..-pP") - static const size_t __fp_chr_cnt = 28; + static inline const size_t __fp_chr_cnt = 28; }; template @@ -71,7 +74,8 @@ struct __num_get : protected __num_get_base { [[__deprecated__("This exists only for ABI compatibility")]] static string __stage2_int_prep(ios_base& __iob, _CharT* __atoms, _CharT& __thousands_sep); - static int __stage2_int_loop( + + [[__deprecated__("This exists only for ABI compatibility")]] static int __stage2_int_loop( _CharT __ct, int __base, char* __a, @@ -83,11 +87,24 @@ struct __num_get : protected __num_get_base { unsigned*& __g_end, _CharT* __atoms); - _LIBCPP_HIDE_FROM_ABI static string __stage2_int_prep(ios_base& __iob, _CharT& __thousands_sep) { - locale __loc = __iob.getloc(); - const numpunct<_CharT>& __np = use_facet >(__loc); - __thousands_sep = __np.thousands_sep(); - return __np.grouping(); + _LIBCPP_HIDE_FROM_ABI static ptrdiff_t __atoms_offset(const _CharT* __atoms, _CharT __val) { + // TODO: Remove the manual vectorization once https://llvm.org/PR168551 is resolved +# if _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS + if constexpr (is_same<_CharT, char>::value) { + // TODO(LLVM 24): This can be removed, since -Wpsabi doesn't warn on [[gnu::always_inline]] functions anymore. + _LIBCPP_DIAGNOSTIC_PUSH + _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wpsabi") + using __vec = __simd_vector; + __vec __chars = std::__broadcast<__vec>(__val); + __vec __cmp = std::__partial_load<__vec, __int_chr_cnt>(__atoms); + auto __res = __chars == __cmp; + if (std::__none_of(__res)) + return __int_chr_cnt; + return std::min(__int_chr_cnt, std::__find_first_set(__res)); + _LIBCPP_DIAGNOSTIC_POP + } +# endif + return std::find(__atoms, __atoms + __int_chr_cnt, __val) - __atoms; } _LIBCPP_HIDE_FROM_ABI const _CharT* __do_widen(ios_base& __iob, _CharT* __atoms) const { @@ -120,54 +137,6 @@ string __num_get<_CharT>::__stage2_float_prep( return __np.grouping(); } -template -int __num_get<_CharT>::__stage2_int_loop( - _CharT __ct, - int __base, - char* __a, - char*& __a_end, - unsigned& __dc, - _CharT __thousands_sep, - const string& __grouping, - unsigned* __g, - unsigned*& __g_end, - _CharT* __atoms) { - if (__a_end == __a && (__ct == __atoms[24] || __ct == __atoms[25])) { - *__a_end++ = __ct == __atoms[24] ? '+' : '-'; - __dc = 0; - return 0; - } - if (__grouping.size() != 0 && __ct == __thousands_sep) { - if (__g_end - __g < __num_get_buf_sz) { - *__g_end++ = __dc; - __dc = 0; - } - return 0; - } - ptrdiff_t __f = std::find(__atoms, __atoms + __int_chr_cnt, __ct) - __atoms; - if (__f >= 24) - return -1; - switch (__base) { - case 8: - case 10: - if (__f >= __base) - return -1; - break; - case 16: - if (__f < 22) - break; - if (__a_end != __a && __a_end - __a <= 2 && __a_end[-1] == '0') { - __dc = 0; - *__a_end++ = __src[__f]; - return 0; - } - return -1; - } - *__a_end++ = __src[__f]; - ++__dc; - return 0; -} - template int __num_get<_CharT>::__stage2_float_loop( _CharT __ct, @@ -272,65 +241,6 @@ _LIBCPP_HIDE_FROM_ABI _Tp __num_get_float(const char* __a, const char* __a_end, return 0; } -template -_LIBCPP_HIDE_FROM_ABI _Tp -__num_get_signed_integral(const char* __a, const char* __a_end, ios_base::iostate& __err, int __base) { - if (__a != __a_end) { - __libcpp_remove_reference_t __save_errno = errno; - errno = 0; - char* __p2; - long long __ll = __locale::__strtoll(__a, &__p2, __base, _LIBCPP_GET_C_LOCALE); - __libcpp_remove_reference_t __current_errno = errno; - if (__current_errno == 0) - errno = __save_errno; - if (__p2 != __a_end) { - __err = ios_base::failbit; - return 0; - } else if (__current_errno == ERANGE || __ll < numeric_limits<_Tp>::min() || numeric_limits<_Tp>::max() < __ll) { - __err = ios_base::failbit; - if (__ll > 0) - return numeric_limits<_Tp>::max(); - else - return numeric_limits<_Tp>::min(); - } - return static_cast<_Tp>(__ll); - } - __err = ios_base::failbit; - return 0; -} - -template -_LIBCPP_HIDE_FROM_ABI _Tp -__num_get_unsigned_integral(const char* __a, const char* __a_end, ios_base::iostate& __err, int __base) { - if (__a != __a_end) { - const bool __negate = *__a == '-'; - if (__negate && ++__a == __a_end) { - __err = ios_base::failbit; - return 0; - } - __libcpp_remove_reference_t __save_errno = errno; - errno = 0; - char* __p2; - unsigned long long __ll = __locale::__strtoull(__a, &__p2, __base, _LIBCPP_GET_C_LOCALE); - __libcpp_remove_reference_t __current_errno = errno; - if (__current_errno == 0) - errno = __save_errno; - if (__p2 != __a_end) { - __err = ios_base::failbit; - return 0; - } else if (__current_errno == ERANGE || numeric_limits<_Tp>::max() < __ll) { - __err = ios_base::failbit; - return numeric_limits<_Tp>::max(); - } - _Tp __res = static_cast<_Tp>(__ll); - if (__negate) - __res = -__res; - return __res; - } - __err = ios_base::failbit; - return 0; -} - template > class num_get : public locale::facet, private __num_get<_CharT> { public: @@ -468,137 +378,196 @@ protected: return __b; } - template - _LIBCPP_HIDE_FROM_ABI iter_type - __do_get_signed(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, _Signed& __v) const { - // Stage 1 - int __base = this->__get_base(__iob); - // Stage 2 - char_type __thousands_sep; - const int __atoms_size = __num_get_base::__int_chr_cnt; - char_type __atoms1[__atoms_size]; - const char_type* __atoms = this->__do_widen(__iob, __atoms1); - string __grouping = this->__stage2_int_prep(__iob, __thousands_sep); - string __buf; - __buf.resize(__buf.capacity()); - char* __a = &__buf[0]; - char* __a_end = __a; - unsigned __g[__num_get_base::__num_get_buf_sz]; - unsigned* __g_end = __g; - unsigned __dc = 0; - for (; __b != __e; ++__b) { - if (__a_end == __a + __buf.size()) { - size_t __tmp = __buf.size(); - __buf.resize(2 * __buf.size()); - __buf.resize(__buf.capacity()); - __a = &__buf[0]; - __a_end = __a + __tmp; - } - if (this->__stage2_int_loop( - *__b, - __base, - __a, - __a_end, - __dc, - __thousands_sep, - __grouping, - __g, - __g_end, - const_cast(__atoms))) - break; - } - if (__grouping.size() != 0 && __g_end - __g < __num_get_base::__num_get_buf_sz) - *__g_end++ = __dc; - // Stage 3 - __v = std::__num_get_signed_integral<_Signed>(__a, __a_end, __err, __base); - // Digit grouping checked - __check_grouping(__grouping, __g, __g_end, __err); - // EOF checked - if (__b == __e) - __err |= ios_base::eofbit; - return __b; - } + template + iter_type __do_get_integral( + iter_type __first, iter_type __last, ios_base& __iob, ios_base::iostate& __err, _MaybeSigned& __v) const { + using _Unsigned = __make_unsigned_t<_MaybeSigned>; - template - _LIBCPP_HIDE_FROM_ABI iter_type - __do_get_unsigned(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, _Unsigned& __v) const { // Stage 1 int __base = this->__get_base(__iob); - // Stage 2 - char_type __thousands_sep; - const int __atoms_size = __num_get_base::__int_chr_cnt; - char_type __atoms1[__atoms_size]; - const char_type* __atoms = this->__do_widen(__iob, __atoms1); - string __grouping = this->__stage2_int_prep(__iob, __thousands_sep); - string __buf; - __buf.resize(__buf.capacity()); - char* __a = &__buf[0]; - char* __a_end = __a; + + // Stages 2 & 3 + // These are combined into a single step where we parse the characters and calculate the value in one go instead of + // storing the relevant characters first (in an allocated buffer) and parse the characters after we extracted them. + // This makes the whole process significantly faster, since we avoid potential allocations and copies. + + const auto& __numpunct = use_facet >(__iob.getloc()); + char_type __thousands_sep = __numpunct.thousands_sep(); + string __grouping = __numpunct.grouping(); + + char_type __atoms_buffer[__num_get_base::__int_chr_cnt]; + const char_type* __atoms = this->__do_widen(__iob, __atoms_buffer); unsigned __g[__num_get_base::__num_get_buf_sz]; unsigned* __g_end = __g; unsigned __dc = 0; - for (; __b != __e; ++__b) { - if (__a_end == __a + __buf.size()) { - size_t __tmp = __buf.size(); - __buf.resize(2 * __buf.size()); - __buf.resize(__buf.capacity()); - __a = &__buf[0]; - __a_end = __a + __tmp; - } - if (this->__stage2_int_loop( - *__b, - __base, - __a, - __a_end, - __dc, - __thousands_sep, - __grouping, - __g, - __g_end, - const_cast(__atoms))) - break; + + if (__first == __last) { + __err |= ios_base::eofbit | ios_base::failbit; + __v = 0; + return __first; } + + while (!__grouping.empty() && *__first == __thousands_sep) { + ++__first; + if (__g_end - __g < this->__num_get_buf_sz) + *__g_end++ = 0; + } + + bool __negate = false; + // __c == '+' || __c == '-' + if (auto __c = *__first; __c == __atoms[24] || __c == __atoms[25]) { + __negate = __c == __atoms[25]; + ++__first; + } + + if (__first == __last) { + __err |= ios_base::eofbit | ios_base::failbit; + __v = 0; + return __first; + } + + bool __parsed_num = false; + + // If we don't have a pre-set base, figure it out and swallow any prefix + if (__base == 0) { + auto __c = *__first; + // __c == '0' + if (__c == __atoms[0]) { + ++__first; + if (__first == __last) { + __err |= ios_base::eofbit; + __v = 0; + return __first; + } + // __c2 == 'x' || __c2 == 'X' + if (auto __c2 = *__first; __c2 == __atoms[22] || __c2 == __atoms[23]) { + __base = 16; + ++__first; + } else { + __base = 8; + __parsed_num = true; // We only swallowed '0', so we've started to parse a number + } + } else { + __base = 10; + } + + // If the base has been specified explicitly, try to swallow the appropriate prefix. We only need to do something + // special for hex, since decimal has no prefix and octal's prefix is '0', which doesn't change the value that + // we'll parse if we don't swallow it. + } else if (__base == 16) { + // Try to swallow '0x' + + // *__first == '0' + if (*__first == __atoms[0]) { + ++__first; + if (__first == __last) { + __err |= ios_base::eofbit; + __v = 0; + return __first; + } + // __c == 'x' || __c == 'X' + if (auto __c = *__first; __c == __atoms[22] || __c == __atoms[23]) + ++__first; + else + __parsed_num = true; // We only swallowed '0', so we've started to parse a number + } + } + + // Calculate the actual number + _Unsigned __val = 0; + bool __overflowed = false; + for (; __first != __last; ++__first) { + auto __c = *__first; + if (!__grouping.empty() && __c == __thousands_sep) { + if (__g_end - __g < this->__num_get_buf_sz) { + *__g_end++ = __dc; + __dc = 0; + } + continue; + } + auto __offset = this->__atoms_offset(__atoms, __c); + if (__offset >= 22) // Not a valid integer character + break; + + if (__base == 16 && __offset >= 16) + __offset -= 6; + if (__offset >= __base) + break; + // __val = (__val * __base) + __offset + __overflowed |= __builtin_mul_overflow(__val, __base, std::addressof(__val)) || + __builtin_add_overflow(__val, __offset, std::addressof(__val)); + __parsed_num = true; + ++__dc; + } + + if (!__parsed_num) { + __err |= ios_base::failbit; + __v = 0; + } else if (__overflowed) { + __err |= ios_base::failbit; + __v = is_signed<_MaybeSigned>::value && __negate + ? numeric_limits<_MaybeSigned>::min() + : numeric_limits<_MaybeSigned>::max(); + } else if (!__negate) { + if (__val > static_cast<_Unsigned>(numeric_limits<_MaybeSigned>::max())) { + __err |= ios_base::failbit; + __v = numeric_limits<_MaybeSigned>::max(); + } else { + __v = __val; + } + } else if (is_signed<_MaybeSigned>::value) { + if (__val > static_cast<_Unsigned>(numeric_limits<_MaybeSigned>::max()) + 1) { + __err |= ios_base::failbit; + __v = numeric_limits<_MaybeSigned>::min(); + } else if (__val == static_cast<_Unsigned>(numeric_limits<_MaybeSigned>::max()) + 1) { + __v = numeric_limits<_MaybeSigned>::min(); + } else { + __v = -__val; + } + } else { + __v = -__val; + } + if (__grouping.size() != 0 && __g_end - __g < __num_get_base::__num_get_buf_sz) *__g_end++ = __dc; - // Stage 3 - __v = std::__num_get_unsigned_integral<_Unsigned>(__a, __a_end, __err, __base); + // Digit grouping checked __check_grouping(__grouping, __g, __g_end, __err); // EOF checked - if (__b == __e) + if (__first == __last) __err |= ios_base::eofbit; - return __b; + return __first; } virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, bool& __v) const; virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long& __v) const { - return this->__do_get_signed(__b, __e, __iob, __err, __v); + return this->__do_get_integral(__b, __e, __iob, __err, __v); } virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long long& __v) const { - return this->__do_get_signed(__b, __e, __iob, __err, __v); + return this->__do_get_integral(__b, __e, __iob, __err, __v); } virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned short& __v) const { - return this->__do_get_unsigned(__b, __e, __iob, __err, __v); + return this->__do_get_integral(__b, __e, __iob, __err, __v); } virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned int& __v) const { - return this->__do_get_unsigned(__b, __e, __iob, __err, __v); + return this->__do_get_integral(__b, __e, __iob, __err, __v); } virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned long& __v) const { - return this->__do_get_unsigned(__b, __e, __iob, __err, __v); + return this->__do_get_integral(__b, __e, __iob, __err, __v); } virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned long long& __v) const { - return this->__do_get_unsigned(__b, __e, __iob, __err, __v); + return this->__do_get_integral(__b, __e, __iob, __err, __v); } virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, float& __v) const { @@ -652,40 +621,13 @@ _InputIterator num_get<_CharT, _InputIterator>::do_get( template _InputIterator num_get<_CharT, _InputIterator>::do_get( iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, void*& __v) const { - // Stage 1 - int __base = 16; - // Stage 2 - char_type __atoms[__num_get_base::__int_chr_cnt]; - char_type __thousands_sep = char_type(); - string __grouping; - std::use_facet >(__iob.getloc()) - .widen(__num_get_base::__src, __num_get_base::__src + __num_get_base::__int_chr_cnt, __atoms); - string __buf; - __buf.resize(__buf.capacity()); - char* __a = &__buf[0]; - char* __a_end = __a; - unsigned __g[__num_get_base::__num_get_buf_sz]; - unsigned* __g_end = __g; - unsigned __dc = 0; - for (; __b != __e; ++__b) { - if (__a_end == __a + __buf.size()) { - size_t __tmp = __buf.size(); - __buf.resize(2 * __buf.size()); - __buf.resize(__buf.capacity()); - __a = &__buf[0]; - __a_end = __a + __tmp; - } - if (this->__stage2_int_loop(*__b, __base, __a, __a_end, __dc, __thousands_sep, __grouping, __g, __g_end, __atoms)) - break; - } - // Stage 3 - __buf.resize(__a_end - __a); - if (__locale::__sscanf(__buf.c_str(), _LIBCPP_GET_C_LOCALE, "%p", &__v) != 1) - __err = ios_base::failbit; - // EOF checked - if (__b == __e) - __err |= ios_base::eofbit; - return __b; + auto __flags = __iob.flags(); + __iob.flags((__flags & ~ios_base::basefield & ~ios_base::uppercase) | ios_base::hex); + uintptr_t __ptr; + auto __res = __do_get_integral(__b, __e, __iob, __err, __ptr); + __iob.flags(__flags); + __v = reinterpret_cast(__ptr); + return __res; } extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_get; @@ -748,6 +690,13 @@ void __num_put<_CharT>::__widen_and_group_int( __op = __ob + (__np - __nb); } +_LIBCPP_HIDE_FROM_ABI inline bool __isdigit(char __c) { return __c >= '0' && __c <= '9'; } + +_LIBCPP_HIDE_FROM_ABI inline bool __isxdigit(char __c) { + auto __lower = __c | 0x20; + return std::__isdigit(__c) || (__lower >= 'a' && __lower <= 'f'); +} + template void __num_put<_CharT>::__widen_and_group_float( char* __nb, char* __np, char* __ne, _CharT* __ob, _CharT*& __op, _CharT*& __oe, const locale& __loc) { @@ -763,11 +712,11 @@ void __num_put<_CharT>::__widen_and_group_float( *__oe++ = __ct.widen(*__nf++); *__oe++ = __ct.widen(*__nf++); for (__ns = __nf; __ns < __ne; ++__ns) - if (!__locale::__isxdigit(*__ns, _LIBCPP_GET_C_LOCALE)) + if (!std::__isxdigit(*__ns)) break; } else { for (__ns = __nf; __ns < __ne; ++__ns) - if (!__locale::__isdigit(*__ns, _LIBCPP_GET_C_LOCALE)) + if (!std::__isdigit(*__ns)) break; } if (__grouping.empty()) { @@ -885,9 +834,7 @@ num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_ty const numpunct& __np = std::use_facet >(__iob.getloc()); typedef typename numpunct::string_type string_type; string_type __nm = __v ? __np.truename() : __np.falsename(); - for (typename string_type::iterator __i = __nm.begin(); __i != __nm.end(); ++__i, ++__s) - *__s = *__i; - return __s; + return std::copy(__nm.begin(), __nm.end(), __s); } template diff --git a/lib/libcxx/include/__locale_dir/pad_and_output.h b/lib/libcxx/include/__locale_dir/pad_and_output.h index a1cb37d078..bdd4d2856d 100644 --- a/lib/libcxx/include/__locale_dir/pad_and_output.h +++ b/lib/libcxx/include/__locale_dir/pad_and_output.h @@ -13,6 +13,8 @@ #if _LIBCPP_HAS_LOCALIZATION +# include <__algorithm/copy.h> +# include <__algorithm/fill_n.h> # include # if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -30,12 +32,9 @@ _LIBCPP_HIDE_FROM_ABI _OutputIterator __pad_and_output( __ns -= __sz; else __ns = 0; - for (; __ob < __op; ++__ob, ++__s) - *__s = *__ob; - for (; __ns; --__ns, ++__s) - *__s = __fl; - for (; __ob < __oe; ++__ob, ++__s) - *__s = *__ob; + __s = std::copy(__ob, __op, __s); + __s = std::fill_n(__s, __ns, __fl); + __s = std::copy(__op, __oe, __s); __iob.width(0); return __s; } diff --git a/lib/libcxx/include/__locale_dir/support/bsd_like.h b/lib/libcxx/include/__locale_dir/support/bsd_like.h index 2b03e18920..6f533b4e1e 100644 --- a/lib/libcxx/include/__locale_dir/support/bsd_like.h +++ b/lib/libcxx/include/__locale_dir/support/bsd_like.h @@ -24,7 +24,6 @@ # include #endif -/* zig patch: https://github.com/llvm/llvm-project/pull/143055 */ #if __has_include() # include #endif @@ -80,22 +79,9 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __ return ::strtold_l(__nptr, __endptr, __loc); } -inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { - return ::strtoll_l(__nptr, __endptr, __base, __loc); -} - -inline _LIBCPP_HIDE_FROM_ABI unsigned long long -__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { - return ::strtoull_l(__nptr, __endptr, __base, __loc); -} - // // Character manipulation functions // -inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return ::isdigit_l(__c, __loc); } - -inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return ::isxdigit_l(__c, __loc); } - #if defined(_LIBCPP_BUILDING_LIBRARY) inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __c, __locale_t __loc) { return ::toupper_l(__c, __loc); } @@ -216,12 +202,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __ char** __s, __locale_t __loc, const char* __format, _Args&&... __args) { return ::asprintf_l(__s, __loc, __format, std::forward<_Args>(__args)...); // non-standard } - -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf( - const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) { - return ::sscanf_l(__s, __loc, __format, std::forward<_Args>(__args)...); -} _LIBCPP_DIAGNOSTIC_POP #undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT diff --git a/lib/libcxx/include/__locale_dir/support/fuchsia.h b/lib/libcxx/include/__locale_dir/support/fuchsia.h index 4b9e63facb..528bfeb0cb 100644 --- a/lib/libcxx/include/__locale_dir/support/fuchsia.h +++ b/lib/libcxx/include/__locale_dir/support/fuchsia.h @@ -141,13 +141,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __ __locale_guard __current(__loc); return ::asprintf(__s, __format, std::forward<_Args>(__args)...); // non-standard } -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf( - const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) { - __locale_guard __current(__loc); - return std::sscanf(__s, __format, std::forward<_Args>(__args)...); -} - _LIBCPP_DIAGNOSTIC_POP #undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT diff --git a/lib/libcxx/include/__locale_dir/support/linux.h b/lib/libcxx/include/__locale_dir/support/linux.h index 23bcf44c31..1a589be49b 100644 --- a/lib/libcxx/include/__locale_dir/support/linux.h +++ b/lib/libcxx/include/__locale_dir/support/linux.h @@ -94,32 +94,9 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __ return ::strtold_l(__nptr, __endptr, __loc); } -inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { -#if !_LIBCPP_HAS_MUSL_LIBC - return ::strtoll_l(__nptr, __endptr, __base, __loc); -#else - (void)__loc; - return ::strtoll(__nptr, __endptr, __base); -#endif -} - -inline _LIBCPP_HIDE_FROM_ABI unsigned long long -__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { -#if !_LIBCPP_HAS_MUSL_LIBC - return ::strtoull_l(__nptr, __endptr, __base, __loc); -#else - (void)__loc; - return ::strtoull(__nptr, __endptr, __base); -#endif -} - // // Character manipulation functions // -inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return isdigit_l(__c, __loc); } - -inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return isxdigit_l(__c, __loc); } - #if defined(_LIBCPP_BUILDING_LIBRARY) inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __c, __locale_t __loc) { return toupper_l(__c, __loc); } @@ -261,20 +238,6 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf( va_end(__va); return __res; } - -#ifndef _LIBCPP_COMPILER_GCC // GCC complains that this can't be always_inline due to C-style varargs -_LIBCPP_HIDE_FROM_ABI -#endif -inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf( - const char* __s, __locale_t __loc, const char* __format, ...) { - va_list __va; - va_start(__va, __format); - __locale_guard __current(__loc); - int __res = std::vsscanf(__s, __format, __va); - va_end(__va); - return __res; -} - } // namespace __locale _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__locale_dir/support/netbsd.h b/lib/libcxx/include/__locale_dir/support/netbsd.h index b1e67ade55..190857f6f8 100644 --- a/lib/libcxx/include/__locale_dir/support/netbsd.h +++ b/lib/libcxx/include/__locale_dir/support/netbsd.h @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -/* zig patch: https://github.com/llvm/llvm-project/pull/143055 */ - #ifndef _LIBCPP___LOCALE_DIR_SUPPORT_NETBSD_H #define _LIBCPP___LOCALE_DIR_SUPPORT_NETBSD_H diff --git a/lib/libcxx/include/__locale_dir/support/newlib.h b/lib/libcxx/include/__locale_dir/support/newlib.h new file mode 100644 index 0000000000..05c8a449cf --- /dev/null +++ b/lib/libcxx/include/__locale_dir/support/newlib.h @@ -0,0 +1,243 @@ +//===-----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_SUPPORT_NEWLIB_H +#define _LIBCPP___LOCALE_DIR_SUPPORT_NEWLIB_H + +#include <__config> +#include <__cstddef/size_t.h> +#include <__std_mbstate_t.h> +#include // std::lconv +#include +#include +#include +#include +#include +#include +#if _LIBCPP_HAS_WIDE_CHARACTERS +# include +# include +#endif + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD +namespace __locale { + +struct __locale_guard { + _LIBCPP_HIDE_FROM_ABI __locale_guard(locale_t& __loc) : __old_loc_(::uselocale(__loc)) {} + + _LIBCPP_HIDE_FROM_ABI ~__locale_guard() { + if (__old_loc_) + ::uselocale(__old_loc_); + } + + locale_t __old_loc_; + + __locale_guard(__locale_guard const&) = delete; + __locale_guard& operator=(__locale_guard const&) = delete; +}; + +// +// Locale management +// +#define _LIBCPP_COLLATE_MASK LC_COLLATE_MASK +#define _LIBCPP_CTYPE_MASK LC_CTYPE_MASK +#define _LIBCPP_MONETARY_MASK LC_MONETARY_MASK +#define _LIBCPP_NUMERIC_MASK LC_NUMERIC_MASK +#define _LIBCPP_TIME_MASK LC_TIME_MASK +#define _LIBCPP_MESSAGES_MASK LC_MESSAGES_MASK +#define _LIBCPP_ALL_MASK LC_ALL_MASK +#define _LIBCPP_LC_ALL LC_ALL + +using __locale_t _LIBCPP_NODEBUG = ::locale_t; + +#if defined(_LIBCPP_BUILDING_LIBRARY) +using __lconv_t _LIBCPP_NODEBUG = std::lconv; + +inline _LIBCPP_HIDE_FROM_ABI __locale_t __newlocale(int __category_mask, const char* __locale, __locale_t __base) { + return ::newlocale(__category_mask, __locale, __base); +} + +inline _LIBCPP_HIDE_FROM_ABI void __freelocale(__locale_t __loc) { ::freelocale(__loc); } + +inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, char const* __locale) { + return ::setlocale(__category, __locale); +} + +inline _LIBCPP_HIDE_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc) { + __locale_guard __current(__loc); + return std::localeconv(); +} +#endif // _LIBCPP_BUILDING_LIBRARY + +// +// Strtonum functions +// +inline _LIBCPP_HIDE_FROM_ABI float __strtof(const char* __nptr, char** __endptr, __locale_t __loc) { + return ::strtof_l(__nptr, __endptr, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI double __strtod(const char* __nptr, char** __endptr, __locale_t __loc) { + return ::strtod_l(__nptr, __endptr, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __endptr, __locale_t __loc) { + return ::strtold_l(__nptr, __endptr, __loc); +} + +// +// Character manipulation functions +// +#if defined(_LIBCPP_BUILDING_LIBRARY) +inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __c, __locale_t __loc) { return toupper_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __tolower(int __c, __locale_t __loc) { return tolower_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __strcoll(const char* __s1, const char* __s2, __locale_t __loc) { + return strcoll_l(__s1, __s2, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t __strxfrm(char* __dest, const char* __src, size_t __n, __locale_t __loc) { + return strxfrm_l(__dest, __src, __n, __loc); +} + +# if _LIBCPP_HAS_WIDE_CHARACTERS +inline _LIBCPP_HIDE_FROM_ABI int __iswctype(wint_t __c, wctype_t __type, __locale_t __loc) { + return iswctype_l(__c, __type, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI int __iswspace(wint_t __c, __locale_t __loc) { return iswspace_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswprint(wint_t __c, __locale_t __loc) { return iswprint_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswcntrl(wint_t __c, __locale_t __loc) { return iswcntrl_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswupper(wint_t __c, __locale_t __loc) { return iswupper_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswlower(wint_t __c, __locale_t __loc) { return iswlower_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswalpha(wint_t __c, __locale_t __loc) { return iswalpha_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswblank(wint_t __c, __locale_t __loc) { return iswblank_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswdigit(wint_t __c, __locale_t __loc) { return iswdigit_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswpunct(wint_t __c, __locale_t __loc) { return iswpunct_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswxdigit(wint_t __c, __locale_t __loc) { return iswxdigit_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI wint_t __towupper(wint_t __c, __locale_t __loc) { return towupper_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI wint_t __towlower(wint_t __c, __locale_t __loc) { return towlower_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __wcscoll(const wchar_t* __ws1, const wchar_t* __ws2, __locale_t __loc) { + return wcscoll_l(__ws1, __ws2, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t __wcsxfrm(wchar_t* __dest, const wchar_t* __src, size_t __n, __locale_t __loc) { + return wcsxfrm_l(__dest, __src, __n, __loc); +} +# endif // _LIBCPP_HAS_WIDE_CHARACTERS + +inline _LIBCPP_HIDE_FROM_ABI +size_t __strftime(char* __s, size_t __max, const char* __format, const struct tm* __tm, __locale_t __loc) { + return strftime_l(__s, __max, __format, __tm, __loc); +} + +// +// Other functions +// +inline _LIBCPP_HIDE_FROM_ABI decltype(MB_CUR_MAX) __mb_len_max(__locale_t __loc) { + __locale_guard __current(__loc); + return MB_CUR_MAX; +} + +# if _LIBCPP_HAS_WIDE_CHARACTERS +inline _LIBCPP_HIDE_FROM_ABI wint_t __btowc(int __c, __locale_t __loc) { + __locale_guard __current(__loc); + return std::btowc(__c); +} + +inline _LIBCPP_HIDE_FROM_ABI int __wctob(wint_t __c, __locale_t __loc) { + __locale_guard __current(__loc); + return std::wctob(__c); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t +__wcsnrtombs(char* __dest, const wchar_t** __src, size_t __nwc, size_t __len, mbstate_t* __ps, __locale_t __loc) { + __locale_guard __current(__loc); + return ::wcsnrtombs(__dest, __src, __nwc, __len, __ps); // non-standard +} + +inline _LIBCPP_HIDE_FROM_ABI size_t __wcrtomb(char* __s, wchar_t __wc, mbstate_t* __ps, __locale_t __loc) { + __locale_guard __current(__loc); + return std::wcrtomb(__s, __wc, __ps); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t +__mbsnrtowcs(wchar_t* __dest, const char** __src, size_t __nms, size_t __len, mbstate_t* __ps, __locale_t __loc) { + __locale_guard __current(__loc); + return ::mbsnrtowcs(__dest, __src, __nms, __len, __ps); // non-standard +} + +inline _LIBCPP_HIDE_FROM_ABI size_t +__mbrtowc(wchar_t* __pwc, const char* __s, size_t __n, mbstate_t* __ps, __locale_t __loc) { + __locale_guard __current(__loc); + return std::mbrtowc(__pwc, __s, __n, __ps); +} + +inline _LIBCPP_HIDE_FROM_ABI int __mbtowc(wchar_t* __pwc, const char* __pmb, size_t __max, __locale_t __loc) { + __locale_guard __current(__loc); + return std::mbtowc(__pwc, __pmb, __max); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t __mbrlen(const char* __s, size_t __n, mbstate_t* __ps, __locale_t __loc) { + __locale_guard __current(__loc); + return std::mbrlen(__s, __n, __ps); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t +__mbsrtowcs(wchar_t* __dest, const char** __src, size_t __len, mbstate_t* __ps, __locale_t __loc) { + __locale_guard __current(__loc); + return std::mbsrtowcs(__dest, __src, __len, __ps); +} +# endif // _LIBCPP_HAS_WIDE_CHARACTERS +#endif // _LIBCPP_BUILDING_LIBRARY + +#ifndef _LIBCPP_COMPILER_GCC // GCC complains that this can't be always_inline due to C-style varargs +_LIBCPP_HIDE_FROM_ABI +#endif +inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __snprintf( + char* __s, size_t __n, __locale_t __loc, const char* __format, ...) { + va_list __va; + va_start(__va, __format); + __locale_guard __current(__loc); + int __res = std::vsnprintf(__s, __n, __format, __va); + va_end(__va); + return __res; +} + +#ifndef _LIBCPP_COMPILER_GCC // GCC complains that this can't be always_inline due to C-style varargs +_LIBCPP_HIDE_FROM_ABI +#endif +inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf( + char** __s, __locale_t __loc, const char* __format, ...) { + va_list __va; + va_start(__va, __format); + __locale_guard __current(__loc); + int __res = ::vasprintf(__s, __format, __va); // non-standard + va_end(__va); + return __res; +} +} // namespace __locale +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___LOCALE_DIR_SUPPORT_NEWLIB_H diff --git a/lib/libcxx/include/__locale_dir/support/no_locale/characters.h b/lib/libcxx/include/__locale_dir/support/no_locale/characters.h index 1281b8bd13..73eba3ec54 100644 --- a/lib/libcxx/include/__locale_dir/support/no_locale/characters.h +++ b/lib/libcxx/include/__locale_dir/support/no_locale/characters.h @@ -29,10 +29,6 @@ namespace __locale { // // Character manipulation functions // -inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t) { return std::isdigit(__c); } - -inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t) { return std::isxdigit(__c); } - #if defined(_LIBCPP_BUILDING_LIBRARY) inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __c, __locale_t) { return std::toupper(__c); } diff --git a/lib/libcxx/include/__locale_dir/support/no_locale/strtonum.h b/lib/libcxx/include/__locale_dir/support/no_locale/strtonum.h index 0e7a32993e..59544e10e4 100644 --- a/lib/libcxx/include/__locale_dir/support/no_locale/strtonum.h +++ b/lib/libcxx/include/__locale_dir/support/no_locale/strtonum.h @@ -34,15 +34,6 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __ return std::strtold(__nptr, __endptr); } -inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t) { - return std::strtoll(__nptr, __endptr, __base); -} - -inline _LIBCPP_HIDE_FROM_ABI unsigned long long -__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t) { - return std::strtoull(__nptr, __endptr, __base); -} - } // namespace __locale _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__locale_dir/support/windows.h b/lib/libcxx/include/__locale_dir/support/windows.h index 0df8709f11..644ef68adf 100644 --- a/lib/libcxx/include/__locale_dir/support/windows.h +++ b/lib/libcxx/include/__locale_dir/support/windows.h @@ -186,21 +186,9 @@ inline _LIBCPP_HIDE_FROM_ABI double __strtod(const char* __nptr, char** __endptr return ::_strtod_l(__nptr, __endptr, __loc); } -inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { - return ::_strtoi64_l(__nptr, __endptr, __base, __loc); -} -inline _LIBCPP_HIDE_FROM_ABI unsigned long long -__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { - return ::_strtoui64_l(__nptr, __endptr, __base, __loc); -} - // // Character manipulation functions // -inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return _isdigit_l(__c, __loc); } - -inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return _isxdigit_l(__c, __loc); } - #if defined(_LIBCPP_BUILDING_LIBRARY) inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __c, __locale_t __loc) { return ::_toupper_l(__c, __loc); } @@ -280,23 +268,6 @@ _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __snpri _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf(char** __ret, __locale_t __loc, const char* __format, ...); -_LIBCPP_DIAGNOSTIC_PUSH -_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wgcc-compat") -_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wformat-nonliteral") // GCC doesn't support [[gnu::format]] on variadic templates -#ifdef _LIBCPP_COMPILER_CLANG_BASED -# define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) _LIBCPP_ATTRIBUTE_FORMAT(__VA_ARGS__) -#else -# define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) /* nothing */ -#endif - -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf( - const char* __dest, __locale_t __loc, const char* __format, _Args&&... __args) { - return ::_sscanf_l(__dest, __format, __loc, std::forward<_Args>(__args)...); -} -_LIBCPP_DIAGNOSTIC_POP -#undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT - #if defined(_LIBCPP_BUILDING_LIBRARY) struct __locale_guard { _LIBCPP_HIDE_FROM_ABI __locale_guard(__locale_t __l) : __status(_configthreadlocale(_ENABLE_PER_THREAD_LOCALE)) { diff --git a/lib/libcxx/include/__locale_dir/time.h b/lib/libcxx/include/__locale_dir/time.h index 5f60d5f36b..78698e9651 100644 --- a/lib/libcxx/include/__locale_dir/time.h +++ b/lib/libcxx/include/__locale_dir/time.h @@ -601,17 +601,13 @@ private: template <> \ _LIBCPP_EXPORTED_FROM_ABI __time_get_storage<_CharT>::__time_get_storage(const string&); \ template <> \ - _LIBCPP_EXPORTED_FROM_ABI void __time_get_storage<_CharT>::init(const ctype<_CharT>&); \ + void __time_get_storage<_CharT>::init(const ctype<_CharT>&); \ template <> \ - _LIBCPP_EXPORTED_FROM_ABI __time_get_storage<_CharT>::string_type __time_get_storage<_CharT>::__analyze( \ - char, const ctype<_CharT>&); \ + __time_get_storage<_CharT>::string_type __time_get_storage<_CharT>::__analyze(char, const ctype<_CharT>&); \ extern template _LIBCPP_EXPORTED_FROM_ABI time_base::dateorder __time_get_storage<_CharT>::__do_date_order() \ const; \ extern template _LIBCPP_EXPORTED_FROM_ABI __time_get_storage<_CharT>::__time_get_storage(const char*); \ - extern template _LIBCPP_EXPORTED_FROM_ABI __time_get_storage<_CharT>::__time_get_storage(const string&); \ - extern template _LIBCPP_EXPORTED_FROM_ABI void __time_get_storage<_CharT>::init(const ctype<_CharT>&); \ - extern template _LIBCPP_EXPORTED_FROM_ABI __time_get_storage<_CharT>::string_type \ - __time_get_storage<_CharT>::__analyze(char, const ctype<_CharT>&); + extern template _LIBCPP_EXPORTED_FROM_ABI __time_get_storage<_CharT>::__time_get_storage(const string&); _LIBCPP_TIME_GET_STORAGE_EXPLICIT_INSTANTIATION(char) # if _LIBCPP_HAS_WIDE_CHARACTERS diff --git a/lib/libcxx/include/__math/hypot.h b/lib/libcxx/include/__math/hypot.h index 8e8c35b4a4..2b12d7be21 100644 --- a/lib/libcxx/include/__math/hypot.h +++ b/lib/libcxx/include/__math/hypot.h @@ -53,7 +53,7 @@ inline _LIBCPP_HIDE_FROM_ABI __promote_t<_A1, _A2> hypot(_A1 __x, _A2 __y) _NOEX // Computes the three-dimensional hypotenuse: `std::hypot(x,y,z)`. // The naive implementation might over-/underflow which is why this implementation is more involved: // If the square of an argument might run into issues, we scale the arguments appropriately. -// See https://github.com/llvm/llvm-project/issues/92782 for a detailed discussion and summary. +// See https://llvm.org/PR92782 for a detailed discussion and summary. template _LIBCPP_HIDE_FROM_ABI _Real __hypot(_Real __x, _Real __y, _Real __z) { // Factors needed to determine if over-/underflow might happen diff --git a/lib/libcxx/include/__math/logarithms.h b/lib/libcxx/include/__math/logarithms.h index 5f5f943977..7343d6a84a 100644 --- a/lib/libcxx/include/__math/logarithms.h +++ b/lib/libcxx/include/__math/logarithms.h @@ -58,7 +58,7 @@ inline _LIBCPP_HIDE_FROM_ABI double log10(_A1 __x) _NOEXCEPT { inline _LIBCPP_HIDE_FROM_ABI int ilogb(float __x) _NOEXCEPT { return __builtin_ilogbf(__x); } template -_LIBCPP_HIDE_FROM_ABI double ilogb(double __x) _NOEXCEPT { +_LIBCPP_HIDE_FROM_ABI int ilogb(double __x) _NOEXCEPT { return __builtin_ilogb(__x); } diff --git a/lib/libcxx/include/__math/traits.h b/lib/libcxx/include/__math/traits.h index 4a6e58c6da..ff22cee730 100644 --- a/lib/libcxx/include/__math/traits.h +++ b/lib/libcxx/include/__math/traits.h @@ -25,33 +25,26 @@ namespace __math { // signbit -// TODO(LLVM 22): Remove conditional once support for Clang 19 is dropped. -#if defined(_LIBCPP_COMPILER_GCC) || __has_constexpr_builtin(__builtin_signbit) -# define _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_CONSTEXPR_SINCE_CXX23 -#else -# define _LIBCPP_SIGNBIT_CONSTEXPR -#endif - // The universal C runtime (UCRT) in the WinSDK provides floating point overloads // for std::signbit(). By defining our overloads as templates, we can work around // this issue as templates are less preferred than non-template functions. template -[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(float __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool signbit(float __x) _NOEXCEPT { return __builtin_signbit(__x); } template -[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool signbit(double __x) _NOEXCEPT { return __builtin_signbit(__x); } template -[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool signbit(long double __x) _NOEXCEPT { return __builtin_signbit(__x); } template ::value, int> = 0> -[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool signbit(_A1 __x) _NOEXCEPT { return __x < 0; } @@ -189,6 +182,82 @@ template ::value && is_ar return __builtin_isunordered((type)__x, (type)__y); } +// MS UCRT incorrectly defines some functions in a way not working with integer types. Until C++20, this was worked +// around by -fdelayed-template-parsing. Since C++20, we can use standard feature "requires" instead. + +// TODO: Remove the workaround once UCRT fixes these functions. Note that this doesn't seem planned as of 2025-07 per +// https://developercommunity.visualstudio.com/t/10294165. + +#if defined(_LIBCPP_MSVCRT) && _LIBCPP_STD_VER >= 20 +namespace __ucrt { +template + requires is_integral_v<_A1> +[[nodiscard]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(_A1) noexcept { + return true; +} + +template + requires is_integral_v<_A1> +[[nodiscard]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(_A1) noexcept { + return false; +} + +template + requires is_integral_v<_A1> +[[nodiscard]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(_A1) noexcept { + return false; +} + +template + requires is_integral_v<_A1> +[[nodiscard]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(_A1 __x) noexcept { + return __x != 0; +} + +template + requires is_arithmetic_v<_A1> && is_arithmetic_v<_A2> +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool isgreater(_A1 __x, _A2 __y) noexcept { + using type = __promote_t<_A1, _A2>; + return __builtin_isgreater((type)__x, (type)__y); +} + +template + requires is_arithmetic_v<_A1> && is_arithmetic_v<_A2> +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool isgreaterequal(_A1 __x, _A2 __y) noexcept { + using type = __promote_t<_A1, _A2>; + return __builtin_isgreaterequal((type)__x, (type)__y); +} + +template + requires is_arithmetic_v<_A1> && is_arithmetic_v<_A2> +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool isless(_A1 __x, _A2 __y) noexcept { + using type = __promote_t<_A1, _A2>; + return __builtin_isless((type)__x, (type)__y); +} + +template + requires is_arithmetic_v<_A1> && is_arithmetic_v<_A2> +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool islessequal(_A1 __x, _A2 __y) noexcept { + using type = __promote_t<_A1, _A2>; + return __builtin_islessequal((type)__x, (type)__y); +} + +template + requires is_arithmetic_v<_A1> && is_arithmetic_v<_A2> +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool islessgreater(_A1 __x, _A2 __y) noexcept { + using type = __promote_t<_A1, _A2>; + return __builtin_islessgreater((type)__x, (type)__y); +} + +template + requires is_arithmetic_v<_A1> && is_arithmetic_v<_A2> +[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool isunordered(_A1 __x, _A2 __y) noexcept { + using type = __promote_t<_A1, _A2>; + return __builtin_isunordered((type)__x, (type)__y); +} +} // namespace __ucrt +#endif + } // namespace __math _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__mdspan/extents.h b/lib/libcxx/include/__mdspan/extents.h index 99b54badf8..d16bbd2af4 100644 --- a/lib/libcxx/include/__mdspan/extents.h +++ b/lib/libcxx/include/__mdspan/extents.h @@ -25,6 +25,7 @@ #include <__type_traits/integer_traits.h> #include <__type_traits/is_convertible.h> #include <__type_traits/is_nothrow_constructible.h> +#include <__type_traits/is_signed.h> #include <__type_traits/make_unsigned.h> #include <__utility/integer_sequence.h> #include <__utility/unreachable.h> @@ -298,11 +299,13 @@ private: public: // [mdspan.extents.obs], observers of multidimensional index space - _LIBCPP_HIDE_FROM_ABI static constexpr rank_type rank() noexcept { return __rank_; } - _LIBCPP_HIDE_FROM_ABI static constexpr rank_type rank_dynamic() noexcept { return __rank_dynamic_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr rank_type rank() noexcept { return __rank_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr rank_type rank_dynamic() noexcept { return __rank_dynamic_; } - _LIBCPP_HIDE_FROM_ABI constexpr index_type extent(rank_type __r) const noexcept { return __vals_.__value(__r); } - _LIBCPP_HIDE_FROM_ABI static constexpr size_t static_extent(rank_type __r) noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr index_type extent(rank_type __r) const noexcept { + return __vals_.__value(__r); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr size_t static_extent(rank_type __r) noexcept { return _Values::__static_value(__r); } diff --git a/lib/libcxx/include/__mdspan/layout_stride.h b/lib/libcxx/include/__mdspan/layout_stride.h index 9d77d71bc3..eb22475756 100644 --- a/lib/libcxx/include/__mdspan/layout_stride.h +++ b/lib/libcxx/include/__mdspan/layout_stride.h @@ -272,11 +272,10 @@ public: return [&](index_sequence<_Pos...>) { if ((__extents_.extent(_Pos) * ... * 1) == 0) return static_cast(0); - else - return static_cast( - static_cast(1) + - (((__extents_.extent(_Pos) - static_cast(1)) * __strides_[_Pos]) + ... + - static_cast(0))); + + return static_cast( + static_cast(1) + (((__extents_.extent(_Pos) - static_cast(1)) * __strides_[_Pos]) + + ... + static_cast(0))); }(make_index_sequence<__rank_>()); } } diff --git a/lib/libcxx/include/__mdspan/mdspan.h b/lib/libcxx/include/__mdspan/mdspan.h index c0f2767819..449baea43f 100644 --- a/lib/libcxx/include/__mdspan/mdspan.h +++ b/lib/libcxx/include/__mdspan/mdspan.h @@ -87,16 +87,17 @@ public: using data_handle_type = typename accessor_type::data_handle_type; using reference = typename accessor_type::reference; - _LIBCPP_HIDE_FROM_ABI static constexpr rank_type rank() noexcept { return extents_type::rank(); } - _LIBCPP_HIDE_FROM_ABI static constexpr rank_type rank_dynamic() noexcept { return extents_type::rank_dynamic(); } - _LIBCPP_HIDE_FROM_ABI static constexpr size_t static_extent(rank_type __r) noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr rank_type rank() noexcept { return extents_type::rank(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr rank_type rank_dynamic() noexcept { + return extents_type::rank_dynamic(); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr size_t static_extent(rank_type __r) noexcept { return extents_type::static_extent(__r); } - _LIBCPP_HIDE_FROM_ABI constexpr index_type extent(rank_type __r) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr index_type extent(rank_type __r) const noexcept { return __map_.extents().extent(__r); - }; + } -public: //-------------------------------------------------------------------------------- // [mdspan.mdspan.cons], mdspan constructors, assignment, and destructor @@ -185,7 +186,7 @@ public: requires((is_convertible_v<_OtherIndexTypes, index_type> && ...) && (is_nothrow_constructible_v && ...) && (sizeof...(_OtherIndexTypes) == rank())) - _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](_OtherIndexTypes... __indices) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](_OtherIndexTypes... __indices) const { // Note the standard layouts would also check this, but user provided ones may not, so we // check the precondition here _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__mdspan_detail::__is_multidimensional_index_in(extents(), __indices...), @@ -196,7 +197,8 @@ public: template requires(is_convertible_v && is_nothrow_constructible_v) - _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](const array< _OtherIndexType, rank()>& __indices) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference + operator[](const array< _OtherIndexType, rank()>& __indices) const { return __acc_.access(__ptr_, [&](index_sequence<_Idxs...>) { return __map_(__indices[_Idxs]...); }(make_index_sequence())); @@ -205,13 +207,13 @@ public: template requires(is_convertible_v && is_nothrow_constructible_v) - _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](span<_OtherIndexType, rank()> __indices) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](span<_OtherIndexType, rank()> __indices) const { return __acc_.access(__ptr_, [&](index_sequence<_Idxs...>) { return __map_(__indices[_Idxs]...); }(make_index_sequence())); } - _LIBCPP_HIDE_FROM_ABI constexpr size_type size() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr size_type size() const noexcept { // Could leave this as only checked in debug mode: semantically size() is never // guaranteed to be related to any accessible range _LIBCPP_ASSERT_UNCATEGORIZED( @@ -237,24 +239,28 @@ public: swap(__x.__acc_, __y.__acc_); } - _LIBCPP_HIDE_FROM_ABI constexpr const extents_type& extents() const noexcept { return __map_.extents(); }; - _LIBCPP_HIDE_FROM_ABI constexpr const data_handle_type& data_handle() const noexcept { return __ptr_; }; - _LIBCPP_HIDE_FROM_ABI constexpr const mapping_type& mapping() const noexcept { return __map_; }; - _LIBCPP_HIDE_FROM_ABI constexpr const accessor_type& accessor() const noexcept { return __acc_; }; + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const extents_type& extents() const noexcept { + return __map_.extents(); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const data_handle_type& data_handle() const noexcept { return __ptr_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const mapping_type& mapping() const noexcept { return __map_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const accessor_type& accessor() const noexcept { return __acc_; } // per LWG-4021 "mdspan::is_always_meow() should be noexcept" - _LIBCPP_HIDE_FROM_ABI static constexpr bool is_always_unique() noexcept { return mapping_type::is_always_unique(); }; - _LIBCPP_HIDE_FROM_ABI static constexpr bool is_always_exhaustive() noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr bool is_always_unique() noexcept { + return mapping_type::is_always_unique(); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr bool is_always_exhaustive() noexcept { return mapping_type::is_always_exhaustive(); - }; - _LIBCPP_HIDE_FROM_ABI static constexpr bool is_always_strided() noexcept { + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr bool is_always_strided() noexcept { return mapping_type::is_always_strided(); - }; + } - _LIBCPP_HIDE_FROM_ABI constexpr bool is_unique() const { return __map_.is_unique(); }; - _LIBCPP_HIDE_FROM_ABI constexpr bool is_exhaustive() const { return __map_.is_exhaustive(); }; - _LIBCPP_HIDE_FROM_ABI constexpr bool is_strided() const { return __map_.is_strided(); }; - _LIBCPP_HIDE_FROM_ABI constexpr index_type stride(rank_type __r) const { return __map_.stride(__r); }; + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool is_unique() const { return __map_.is_unique(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool is_exhaustive() const { return __map_.is_exhaustive(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool is_strided() const { return __map_.is_strided(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr index_type stride(rank_type __r) const { return __map_.stride(__r); } private: _LIBCPP_NO_UNIQUE_ADDRESS data_handle_type __ptr_{}; diff --git a/lib/libcxx/include/__memory/addressof.h b/lib/libcxx/include/__memory/addressof.h index 667071dfc6..52ec94a529 100644 --- a/lib/libcxx/include/__memory/addressof.h +++ b/lib/libcxx/include/__memory/addressof.h @@ -19,7 +19,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -inline _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_NO_CFI _LIBCPP_HIDE_FROM_ABI _Tp* addressof(_Tp& __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_NO_CFI _LIBCPP_HIDE_FROM_ABI _Tp* +addressof(_Tp& __x) _NOEXCEPT { return __builtin_addressof(__x); } @@ -27,24 +28,25 @@ inline _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_NO_CFI _LIBCPP_HIDE_FROM_ABI _Tp* a // Objective-C++ Automatic Reference Counting uses qualified pointers // that require special addressof() signatures. template -inline _LIBCPP_HIDE_FROM_ABI __strong _Tp* addressof(__strong _Tp& __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __strong _Tp* addressof(__strong _Tp& __x) _NOEXCEPT { return &__x; } # if __has_feature(objc_arc_weak) template -inline _LIBCPP_HIDE_FROM_ABI __weak _Tp* addressof(__weak _Tp& __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __weak _Tp* addressof(__weak _Tp& __x) _NOEXCEPT { return &__x; } # endif template -inline _LIBCPP_HIDE_FROM_ABI __autoreleasing _Tp* addressof(__autoreleasing _Tp& __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __autoreleasing _Tp* addressof(__autoreleasing _Tp& __x) _NOEXCEPT { return &__x; } template -inline _LIBCPP_HIDE_FROM_ABI __unsafe_unretained _Tp* addressof(__unsafe_unretained _Tp& __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __unsafe_unretained _Tp* +addressof(__unsafe_unretained _Tp& __x) _NOEXCEPT { return &__x; } #endif diff --git a/lib/libcxx/include/__memory/align.h b/lib/libcxx/include/__memory/align.h index 402eac3380..47a7a2d274 100644 --- a/lib/libcxx/include/__memory/align.h +++ b/lib/libcxx/include/__memory/align.h @@ -11,6 +11,7 @@ #include <__config> #include <__cstddef/size_t.h> +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -18,7 +19,23 @@ _LIBCPP_BEGIN_NAMESPACE_STD -_LIBCPP_EXPORTED_FROM_ABI void* align(size_t __align, size_t __sz, void*& __ptr, size_t& __space); +inline namespace __align_inline { +_LIBCPP_HIDE_FROM_ABI inline void* align(size_t __align, size_t __sz, void*& __ptr, size_t& __space) { + void* __r = nullptr; + if (__sz <= __space) { + char* __p1 = static_cast(__ptr); + char* __p2 = reinterpret_cast(reinterpret_cast(__p1 + (__align - 1)) & -__align); + size_t __d = static_cast(__p2 - __p1); + if (__d <= __space - __sz) { + __r = __p2; + __ptr = __r; + __space -= __d; + } + } + return __r; +} + +} // namespace __align_inline _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__memory/allocate_at_least.h b/lib/libcxx/include/__memory/allocate_at_least.h index 9b5a8bcbd4..72140d0de2 100644 --- a/lib/libcxx/include/__memory/allocate_at_least.h +++ b/lib/libcxx/include/__memory/allocate_at_least.h @@ -19,26 +19,31 @@ _LIBCPP_BEGIN_NAMESPACE_STD +template +struct __allocation_result { + _Pointer ptr; + _SizeT count; + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __allocation_result(_Pointer __ptr, _SizeT __count) + : ptr(__ptr), count(__count) {} +}; +_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(__allocation_result); + #if _LIBCPP_STD_VER >= 23 template [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto __allocate_at_least(_Alloc& __alloc, size_t __n) { - return std::allocator_traits<_Alloc>::allocate_at_least(__alloc, __n); + auto __res = std::allocator_traits<_Alloc>::allocate_at_least(__alloc, __n); + return __allocation_result{__res.ptr, __res.count}; } #else -template -struct __allocation_result { - _Pointer ptr; - size_t count; -}; - -template +template > [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI -_LIBCPP_CONSTEXPR __allocation_result::pointer> +_LIBCPP_CONSTEXPR __allocation_result __allocate_at_least(_Alloc& __alloc, size_t __n) { - return {__alloc.allocate(__n), __n}; + return __allocation_result(__alloc.allocate(__n), __n); } #endif // _LIBCPP_STD_VER >= 23 diff --git a/lib/libcxx/include/__memory/allocator.h b/lib/libcxx/include/__memory/allocator.h index 52f4122a9b..609b305a12 100644 --- a/lib/libcxx/include/__memory/allocator.h +++ b/lib/libcxx/include/__memory/allocator.h @@ -14,7 +14,6 @@ #include <__cstddef/ptrdiff_t.h> #include <__cstddef/size_t.h> #include <__memory/addressof.h> -#include <__memory/allocate_at_least.h> #include <__memory/allocator_traits.h> #include <__new/allocate.h> #include <__new/exceptions.h> @@ -51,33 +50,21 @@ public: }; #endif // _LIBCPP_STD_VER <= 17 -// This class provides a non-trivial default constructor to the class that derives from it -// if the condition is satisfied. -// -// The second template parameter exists to allow giving a unique type to __non_trivial_if, -// which makes it possible to avoid breaking the ABI when making this a base class of an -// existing class. Without that, imagine we have classes D1 and D2, both of which used to -// have no base classes, but which now derive from __non_trivial_if. The layout of a class -// that inherits from both D1 and D2 will change because the two __non_trivial_if base -// classes are not allowed to share the same address. -// -// By making those __non_trivial_if base classes unique, we work around this problem and -// it is safe to start deriving from __non_trivial_if in existing classes. -template -struct __non_trivial_if {}; +template +struct __non_trivially_default_constructible_if {}; template -struct __non_trivial_if { - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __non_trivial_if() _NOEXCEPT {} +struct __non_trivially_default_constructible_if { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __non_trivially_default_constructible_if() {} }; -// allocator -// -// Note: For ABI compatibility between C++20 and previous standards, we make -// allocator trivial in C++20. - template -class allocator : private __non_trivial_if::value, allocator<_Tp> > { +class allocator +// TODO(LLVM 24): Remove the opt-out +#ifdef _LIBCPP_DEPRECATED_ABI_NON_TRIVIAL_ALLOCATOR + : __non_trivially_default_constructible_if::value, allocator<_Tp> > +#endif +{ static_assert(!is_const<_Tp>::value, "std::allocator does not support const types"); static_assert(!is_volatile<_Tp>::value, "std::allocator does not support volatile types"); @@ -133,10 +120,11 @@ public: typedef allocator<_Up> other; }; - _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI pointer address(reference __x) const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI pointer address(reference __x) const _NOEXCEPT { return std::addressof(__x); } - _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI const_pointer address(const_reference __x) const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI const_pointer + address(const_reference __x) const _NOEXCEPT { return std::addressof(__x); } @@ -144,7 +132,7 @@ public: return allocate(__n); } - _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return size_type(~0) / sizeof(_Tp); } diff --git a/lib/libcxx/include/__memory/allocator_traits.h b/lib/libcxx/include/__memory/allocator_traits.h index 46c247f704..b38d7293a3 100644 --- a/lib/libcxx/include/__memory/allocator_traits.h +++ b/lib/libcxx/include/__memory/allocator_traits.h @@ -314,23 +314,25 @@ struct allocator_traits { } template , int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static size_type max_size(const allocator_type& __a) _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static size_type + max_size(const allocator_type& __a) _NOEXCEPT { _LIBCPP_SUPPRESS_DEPRECATED_PUSH return __a.max_size(); _LIBCPP_SUPPRESS_DEPRECATED_POP } template , int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static size_type max_size(const allocator_type&) _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static size_type + max_size(const allocator_type&) _NOEXCEPT { return numeric_limits::max() / sizeof(value_type); } template , int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static allocator_type + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static allocator_type select_on_container_copy_construction(const allocator_type& __a) { return __a.select_on_container_copy_construction(); } template , int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static allocator_type + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static allocator_type select_on_container_copy_construction(const allocator_type& __a) { return __a; } diff --git a/lib/libcxx/include/__memory/compressed_pair.h b/lib/libcxx/include/__memory/compressed_pair.h index 29e503931b..f1f1c92045 100644 --- a/lib/libcxx/include/__memory/compressed_pair.h +++ b/lib/libcxx/include/__memory/compressed_pair.h @@ -28,8 +28,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD // understand how it works). // // ================================================================================================================== // -// The first member is aligned to the alignment of the second member to force padding in front of the compressed pair -// in case there are members before it. +// On GCC, the first member is aligned to the alignment of the second member to force padding in front of the compressed +// pair in case there are members before it. // // For example: // (assuming x86-64 linux) @@ -52,7 +52,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD // // Furthermore, that alignment must be the same as what was used in the old __compressed_pair layout, so we must // handle reference types specially since alignof(T&) == alignof(T). -// See https://github.com/llvm/llvm-project/issues/118559. +// See https://llvm.org/PR118559. +// +// On Clang, this is unnecessary, since we use anonymous structs instead, which automatically handle the alignment +// correctly. #ifndef _LIBCPP_ABI_NO_COMPRESSED_PAIR_PADDING @@ -64,7 +67,7 @@ inline const size_t __compressed_pair_alignment<_Tp&> = _LIBCPP_ALIGNOF(void*); template inline const bool __is_reference_or_unpadded_object = - (is_empty<_ToPad>::value && !__libcpp_is_final<_ToPad>::value) || sizeof(_ToPad) == __datasizeof_v<_ToPad>; + (is_empty<_ToPad>::value && !__is_final_v<_ToPad>) || sizeof(_ToPad) == __datasizeof_v<_ToPad>; template inline const bool __is_reference_or_unpadded_object<_Tp&> = true; @@ -80,6 +83,10 @@ class __compressed_pair_padding { template class __compressed_pair_padding<_ToPad, true> {}; +# define _LIBCPP_COMPRESSED_ELEMENT(T1, Initializer1) \ + _LIBCPP_NO_UNIQUE_ADDRESS T1 Initializer1; \ + _LIBCPP_NO_UNIQUE_ADDRESS ::std::__compressed_pair_padding _LIBCPP_CONCAT3(__padding_, __LINE__, _) + // TODO: Fix the ABI for GCC as well once https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121637 is fixed # ifdef _LIBCPP_COMPILER_GCC # define _LIBCPP_COMPRESSED_PAIR(T1, Initializer1, T2, Initializer2) \ @@ -100,8 +107,7 @@ class __compressed_pair_padding<_ToPad, true> {}; # else # define _LIBCPP_COMPRESSED_PAIR(T1, Initializer1, T2, Initializer2) \ struct { \ - _LIBCPP_NO_UNIQUE_ADDRESS \ - __attribute__((__aligned__(::std::__compressed_pair_alignment))) T1 Initializer1; \ + _LIBCPP_NO_UNIQUE_ADDRESS T1 Initializer1; \ _LIBCPP_NO_UNIQUE_ADDRESS ::std::__compressed_pair_padding _LIBCPP_CONCAT3(__padding1_, __LINE__, _); \ _LIBCPP_NO_UNIQUE_ADDRESS T2 Initializer2; \ _LIBCPP_NO_UNIQUE_ADDRESS ::std::__compressed_pair_padding _LIBCPP_CONCAT3(__padding2_, __LINE__, _); \ @@ -109,9 +115,7 @@ class __compressed_pair_padding<_ToPad, true> {}; # define _LIBCPP_COMPRESSED_TRIPLE(T1, Initializer1, T2, Initializer2, T3, Initializer3) \ struct { \ - _LIBCPP_NO_UNIQUE_ADDRESS \ - __attribute__((__aligned__(::std::__compressed_pair_alignment), \ - __aligned__(::std::__compressed_pair_alignment))) T1 Initializer1; \ + _LIBCPP_NO_UNIQUE_ADDRESS T1 Initializer1; \ _LIBCPP_NO_UNIQUE_ADDRESS ::std::__compressed_pair_padding _LIBCPP_CONCAT3(__padding1_, __LINE__, _); \ _LIBCPP_NO_UNIQUE_ADDRESS T2 Initializer2; \ _LIBCPP_NO_UNIQUE_ADDRESS ::std::__compressed_pair_padding _LIBCPP_CONCAT3(__padding2_, __LINE__, _); \ @@ -121,6 +125,8 @@ class __compressed_pair_padding<_ToPad, true> {}; # endif #else +# define _LIBCPP_COMPRESSED_ELEMENT(T1, Initializer1) _LIBCPP_NO_UNIQUE_ADDRESS T1 Initializer1 + # define _LIBCPP_COMPRESSED_PAIR(T1, Name1, T2, Name2) \ _LIBCPP_NO_UNIQUE_ADDRESS T1 Name1; \ _LIBCPP_NO_UNIQUE_ADDRESS T2 Name2 diff --git a/lib/libcxx/include/__memory/construct_at.h b/lib/libcxx/include/__memory/construct_at.h index b64e64b5a2..5378c03aba 100644 --- a/lib/libcxx/include/__memory/construct_at.h +++ b/lib/libcxx/include/__memory/construct_at.h @@ -14,7 +14,6 @@ #include <__config> #include <__memory/addressof.h> #include <__new/placement_new_delete.h> -#include <__type_traits/enable_if.h> #include <__type_traits/is_array.h> #include <__utility/declval.h> #include <__utility/forward.h> @@ -33,7 +32,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template ()) _Tp(std::declval<_Args>()...))> -_LIBCPP_HIDE_FROM_ABI constexpr _Tp* construct_at(_Tp* __location, _Args&&... __args) { +_LIBCPP_HIDE_FROM_ABI constexpr _Tp* construct_at(_Tp* _LIBCPP_DIAGNOSE_NULLPTR __location, _Args&&... __args) { _LIBCPP_ASSERT_NON_NULL(__location != nullptr, "null pointer given to construct_at"); return ::new (static_cast(__location)) _Tp(std::forward<_Args>(__args)...); } @@ -55,35 +54,25 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp* __construct_at(_Tp* __l // The internal functions are available regardless of the language version (with the exception of the `__destroy_at` // taking an array). -template ::value, int> = 0> +template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __destroy_at(_Tp* __loc) { _LIBCPP_ASSERT_NON_NULL(__loc != nullptr, "null pointer given to destroy_at"); - __loc->~_Tp(); -} - #if _LIBCPP_STD_VER >= 20 -template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI constexpr void __destroy_at(_Tp* __loc) { - _LIBCPP_ASSERT_NON_NULL(__loc != nullptr, "null pointer given to destroy_at"); - for (auto&& __val : *__loc) - std::__destroy_at(std::addressof(__val)); -} + if constexpr (is_array_v<_Tp>) { + for (auto&& __val : *__loc) + std::__destroy_at(std::addressof(__val)); + } else #endif + { + __loc->~_Tp(); + } +} #if _LIBCPP_STD_VER >= 17 - -template , int> = 0> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void destroy_at(_Tp* __loc) { +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void destroy_at(_Tp* _LIBCPP_DIAGNOSE_NULLPTR __loc) { std::__destroy_at(__loc); } - -# if _LIBCPP_STD_VER >= 20 -template , int> = 0> -_LIBCPP_HIDE_FROM_ABI constexpr void destroy_at(_Tp* __loc) { - std::__destroy_at(__loc); -} -# endif - #endif // _LIBCPP_STD_VER >= 17 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__memory/inout_ptr.h b/lib/libcxx/include/__memory/inout_ptr.h index ef345fe469..0fa685afb2 100644 --- a/lib/libcxx/include/__memory/inout_ptr.h +++ b/lib/libcxx/include/__memory/inout_ptr.h @@ -96,7 +96,7 @@ private: }; template -_LIBCPP_HIDE_FROM_ABI auto inout_ptr(_Smart& __s, _Args&&... __args) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI auto inout_ptr(_Smart& __s, _Args&&... __args) { using _Ptr = conditional_t, __pointer_of_t<_Smart>, _Pointer>; return std::inout_ptr_t<_Smart, _Ptr, _Args&&...>(__s, std::forward<_Args>(__args)...); } diff --git a/lib/libcxx/include/__memory/is_sufficiently_aligned.h b/lib/libcxx/include/__memory/is_sufficiently_aligned.h index 4280920cab..93d24aaf78 100644 --- a/lib/libcxx/include/__memory/is_sufficiently_aligned.h +++ b/lib/libcxx/include/__memory/is_sufficiently_aligned.h @@ -23,7 +23,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 26 template -_LIBCPP_HIDE_FROM_ABI bool is_sufficiently_aligned(_Tp* __ptr) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_sufficiently_aligned(_Tp* __ptr) { return reinterpret_cast(__ptr) % _Alignment == 0; } diff --git a/lib/libcxx/include/__memory/out_ptr.h b/lib/libcxx/include/__memory/out_ptr.h index e498e3307b..23a77f6a0f 100644 --- a/lib/libcxx/include/__memory/out_ptr.h +++ b/lib/libcxx/include/__memory/out_ptr.h @@ -88,7 +88,7 @@ private: }; template -_LIBCPP_HIDE_FROM_ABI auto out_ptr(_Smart& __s, _Args&&... __args) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI auto out_ptr(_Smart& __s, _Args&&... __args) { using _Ptr = conditional_t, __pointer_of_t<_Smart>, _Pointer>; return std::out_ptr_t<_Smart, _Ptr, _Args&&...>(__s, std::forward<_Args>(__args)...); } diff --git a/lib/libcxx/include/__memory/pointer_traits.h b/lib/libcxx/include/__memory/pointer_traits.h index 8c7f8dff1b..62fcd93263 100644 --- a/lib/libcxx/include/__memory/pointer_traits.h +++ b/lib/libcxx/include/__memory/pointer_traits.h @@ -255,7 +255,7 @@ concept __resettable_smart_pointer_with_args = requires(_Smart __s, _Pointer __p // This function ensures safe conversions between fancy pointers at compile-time, where we avoid casts from/to // `__void_pointer` by obtaining the underlying raw pointer from the fancy pointer using `std::to_address`, // then dereferencing it to retrieve the pointed-to object, and finally constructing the target fancy pointer -// to that object using the `std::pointer_traits<>::pinter_to` function. +// to that object using the `std::pointer_traits<>::pointer_to` function. template _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI _PtrTo __static_fancy_pointer_cast(const _PtrFrom& __p) { using __ptr_traits = pointer_traits<_PtrTo>; diff --git a/lib/libcxx/include/__memory/raw_storage_iterator.h b/lib/libcxx/include/__memory/raw_storage_iterator.h index 0e8b909070..dff0fed3b7 100644 --- a/lib/libcxx/include/__memory/raw_storage_iterator.h +++ b/lib/libcxx/include/__memory/raw_storage_iterator.h @@ -28,15 +28,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_RAW_STORAGE_ITERATOR) -_LIBCPP_SUPPRESS_DEPRECATED_PUSH template class _LIBCPP_DEPRECATED_IN_CXX17 raw_storage_iterator -# if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) - : public iterator -# endif -{ - _LIBCPP_SUPPRESS_DEPRECATED_POP - + : public __iterator_base, output_iterator_tag, void, void, void, void> { private: _OutputIterator __x_; @@ -52,7 +46,7 @@ public: typedef void reference; _LIBCPP_HIDE_FROM_ABI explicit raw_storage_iterator(_OutputIterator __x) : __x_(__x) {} - _LIBCPP_HIDE_FROM_ABI raw_storage_iterator& operator*() { return *this; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI raw_storage_iterator& operator*() { return *this; } _LIBCPP_HIDE_FROM_ABI raw_storage_iterator& operator=(const _Tp& __element) { ::new ((void*)std::addressof(*__x_)) _Tp(__element); return *this; @@ -73,7 +67,7 @@ public: return __t; } # if _LIBCPP_STD_VER >= 14 - _LIBCPP_HIDE_FROM_ABI _OutputIterator base() const { return __x_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _OutputIterator base() const { return __x_; } # endif }; diff --git a/lib/libcxx/include/__memory/shared_count.h b/lib/libcxx/include/__memory/shared_count.h index dad20bcabd..b40d8c9cf7 100644 --- a/lib/libcxx/include/__memory/shared_count.h +++ b/lib/libcxx/include/__memory/shared_count.h @@ -22,37 +22,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD // NOTE: Relaxed and acq/rel atomics (for increment and decrement respectively) // should be sufficient for thread safety. // See https://llvm.org/PR22803 -#if (defined(__clang__) && __has_builtin(__atomic_add_fetch) && defined(__ATOMIC_RELAXED) && \ - defined(__ATOMIC_ACQ_REL)) || \ - defined(_LIBCPP_COMPILER_GCC) -# define _LIBCPP_HAS_BUILTIN_ATOMIC_SUPPORT 1 -#else -# define _LIBCPP_HAS_BUILTIN_ATOMIC_SUPPORT 0 -#endif - -template -inline _LIBCPP_HIDE_FROM_ABI _ValueType __libcpp_relaxed_load(_ValueType const* __value) { -#if _LIBCPP_HAS_THREADS && defined(__ATOMIC_RELAXED) && \ - (__has_builtin(__atomic_load_n) || defined(_LIBCPP_COMPILER_GCC)) - return __atomic_load_n(__value, __ATOMIC_RELAXED); -#else - return *__value; -#endif -} - -template -inline _LIBCPP_HIDE_FROM_ABI _ValueType __libcpp_acquire_load(_ValueType const* __value) { -#if _LIBCPP_HAS_THREADS && defined(__ATOMIC_ACQUIRE) && \ - (__has_builtin(__atomic_load_n) || defined(_LIBCPP_COMPILER_GCC)) - return __atomic_load_n(__value, __ATOMIC_ACQUIRE); -#else - return *__value; -#endif -} template inline _LIBCPP_HIDE_FROM_ABI _Tp __libcpp_atomic_refcount_increment(_Tp& __t) _NOEXCEPT { -#if _LIBCPP_HAS_BUILTIN_ATOMIC_SUPPORT && _LIBCPP_HAS_THREADS +#if _LIBCPP_HAS_THREADS return __atomic_add_fetch(std::addressof(__t), 1, __ATOMIC_RELAXED); #else return __t += 1; @@ -61,7 +34,7 @@ inline _LIBCPP_HIDE_FROM_ABI _Tp __libcpp_atomic_refcount_increment(_Tp& __t) _N template inline _LIBCPP_HIDE_FROM_ABI _Tp __libcpp_atomic_refcount_decrement(_Tp& __t) _NOEXCEPT { -#if _LIBCPP_HAS_BUILTIN_ATOMIC_SUPPORT && _LIBCPP_HAS_THREADS +#if _LIBCPP_HAS_THREADS return __atomic_add_fetch(std::addressof(__t), -1, __ATOMIC_ACQ_REL); #else return __t -= 1; @@ -95,7 +68,13 @@ public: return false; } #endif - _LIBCPP_HIDE_FROM_ABI long use_count() const _NOEXCEPT { return __libcpp_relaxed_load(&__shared_owners_) + 1; } + _LIBCPP_HIDE_FROM_ABI long use_count() const _NOEXCEPT { +#if _LIBCPP_HAS_THREADS + return __atomic_load_n(&__shared_owners_, __ATOMIC_RELAXED) + 1; +#else + return __shared_owners_ + 1; +#endif + } }; class _LIBCPP_EXPORTED_FROM_ABI __shared_weak_count : private __shared_count { diff --git a/lib/libcxx/include/__memory/shared_ptr.h b/lib/libcxx/include/__memory/shared_ptr.h index 0cbd995105..4fbd0af984 100644 --- a/lib/libcxx/include/__memory/shared_ptr.h +++ b/lib/libcxx/include/__memory/shared_ptr.h @@ -41,19 +41,18 @@ #include <__type_traits/enable_if.h> #include <__type_traits/integral_constant.h> #include <__type_traits/is_array.h> -#include <__type_traits/is_bounded_array.h> #include <__type_traits/is_constructible.h> #include <__type_traits/is_convertible.h> #include <__type_traits/is_function.h> #include <__type_traits/is_reference.h> #include <__type_traits/is_same.h> -#include <__type_traits/is_unbounded_array.h> #include <__type_traits/nat.h> #include <__type_traits/negation.h> #include <__type_traits/remove_cv.h> #include <__type_traits/remove_extent.h> #include <__type_traits/remove_reference.h> #include <__utility/declval.h> +#include <__utility/exception_guard.h> #include <__utility/forward.h> #include <__utility/move.h> #include <__utility/swap.h> @@ -78,7 +77,7 @@ public: _LIBCPP_HIDE_FROM_ABI bad_weak_ptr(const bad_weak_ptr&) _NOEXCEPT = default; _LIBCPP_HIDE_FROM_ABI bad_weak_ptr& operator=(const bad_weak_ptr&) _NOEXCEPT = default; ~bad_weak_ptr() _NOEXCEPT override; - const char* what() const _NOEXCEPT override; + [[__nodiscard__]] const char* what() const _NOEXCEPT override; }; [[__noreturn__]] inline _LIBCPP_HIDE_FROM_ABI void __throw_bad_weak_ptr() { @@ -316,10 +315,8 @@ public: #endif // A shared_ptr contains only two raw pointers which point to the heap and move constructing already doesn't require - // any bookkeeping, so it's always trivially relocatable. It is also replaceable because assignment just rebinds the - // shared_ptr to manage a different object. + // any bookkeeping, so it's always trivially relocatable. using __trivially_relocatable _LIBCPP_NODEBUG = shared_ptr; - using __replaceable _LIBCPP_NODEBUG = shared_ptr; private: element_type* __ptr_; @@ -352,23 +349,16 @@ public: template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI shared_ptr(_Yp* __p, _Dp __d) : __ptr_(__p) { -#if _LIBCPP_HAS_EXCEPTIONS - try { -#endif // _LIBCPP_HAS_EXCEPTIONS - typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT; - typedef __shared_ptr_pointer<_Yp*, _Dp, _AllocT> _CntrlBlk; + auto __guard = std::__make_exception_guard([&] { __d(__p); }); + typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT; + typedef __shared_ptr_pointer<_Yp*, _Dp, _AllocT> _CntrlBlk; #ifndef _LIBCPP_CXX03_LANG - __cntrl_ = new _CntrlBlk(__p, std::move(__d), _AllocT()); + __cntrl_ = new _CntrlBlk(__p, std::move(__d), _AllocT()); #else __cntrl_ = new _CntrlBlk(__p, __d, _AllocT()); #endif // not _LIBCPP_CXX03_LANG - __enable_weak_this(__p, __p); -#if _LIBCPP_HAS_EXCEPTIONS - } catch (...) { - __d(__p); - throw; - } -#endif // _LIBCPP_HAS_EXCEPTIONS + __enable_weak_this(__p, __p); + __guard.__complete(); } template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI shared_ptr(_Yp* __p, _Dp __d, _Alloc __a) : __ptr_(__p) { -#if _LIBCPP_HAS_EXCEPTIONS - try { -#endif // _LIBCPP_HAS_EXCEPTIONS - typedef __shared_ptr_pointer<_Yp*, _Dp, _Alloc> _CntrlBlk; - typedef typename __allocator_traits_rebind<_Alloc, _CntrlBlk>::type _A2; - typedef __allocator_destructor<_A2> _D2; - _A2 __a2(__a); - unique_ptr<_CntrlBlk, _D2> __hold2(__a2.allocate(1), _D2(__a2, 1)); - ::new ((void*)std::addressof(*__hold2.get())) + auto __guard = std::__make_exception_guard([&] { __d(__p); }); + typedef __shared_ptr_pointer<_Yp*, _Dp, _Alloc> _CntrlBlk; + typedef typename __allocator_traits_rebind<_Alloc, _CntrlBlk>::type _A2; + typedef __allocator_destructor<_A2> _D2; + _A2 __a2(__a); + unique_ptr<_CntrlBlk, _D2> __hold2(__a2.allocate(1), _D2(__a2, 1)); + ::new ((void*)std::addressof(*__hold2.get())) #ifndef _LIBCPP_CXX03_LANG - _CntrlBlk(__p, std::move(__d), __a); + _CntrlBlk(__p, std::move(__d), __a); #else _CntrlBlk(__p, __d, __a); #endif // not _LIBCPP_CXX03_LANG - __cntrl_ = std::addressof(*__hold2.release()); - __enable_weak_this(__p, __p); -#if _LIBCPP_HAS_EXCEPTIONS - } catch (...) { - __d(__p); - throw; - } -#endif // _LIBCPP_HAS_EXCEPTIONS + __cntrl_ = std::addressof(*__hold2.release()); + __enable_weak_this(__p, __p); + __guard.__complete(); } template @@ -406,22 +389,15 @@ public: _Dp __d, __enable_if_t<__shared_ptr_nullptr_deleter_ctor_reqs<_Dp>::value, __nullptr_sfinae_tag> = __nullptr_sfinae_tag()) : __ptr_(nullptr) { -#if _LIBCPP_HAS_EXCEPTIONS - try { -#endif // _LIBCPP_HAS_EXCEPTIONS - typedef typename __shared_ptr_default_allocator<_Tp>::type _AllocT; - typedef __shared_ptr_pointer _CntrlBlk; + auto __guard = std::__make_exception_guard([&] { __d(__p); }); + typedef typename __shared_ptr_default_allocator<_Tp>::type _AllocT; + typedef __shared_ptr_pointer _CntrlBlk; #ifndef _LIBCPP_CXX03_LANG - __cntrl_ = new _CntrlBlk(__p, std::move(__d), _AllocT()); + __cntrl_ = new _CntrlBlk(__p, std::move(__d), _AllocT()); #else __cntrl_ = new _CntrlBlk(__p, __d, _AllocT()); #endif // not _LIBCPP_CXX03_LANG -#if _LIBCPP_HAS_EXCEPTIONS - } catch (...) { - __d(__p); - throw; - } -#endif // _LIBCPP_HAS_EXCEPTIONS + __guard.__complete(); } template @@ -431,27 +407,20 @@ public: _Alloc __a, __enable_if_t<__shared_ptr_nullptr_deleter_ctor_reqs<_Dp>::value, __nullptr_sfinae_tag> = __nullptr_sfinae_tag()) : __ptr_(nullptr) { -#if _LIBCPP_HAS_EXCEPTIONS - try { -#endif // _LIBCPP_HAS_EXCEPTIONS - typedef __shared_ptr_pointer _CntrlBlk; - typedef typename __allocator_traits_rebind<_Alloc, _CntrlBlk>::type _A2; - typedef __allocator_destructor<_A2> _D2; - _A2 __a2(__a); - unique_ptr<_CntrlBlk, _D2> __hold2(__a2.allocate(1), _D2(__a2, 1)); - ::new ((void*)std::addressof(*__hold2.get())) + auto __guard = std::__make_exception_guard([&] { __d(__p); }); + typedef __shared_ptr_pointer _CntrlBlk; + typedef typename __allocator_traits_rebind<_Alloc, _CntrlBlk>::type _A2; + typedef __allocator_destructor<_A2> _D2; + _A2 __a2(__a); + unique_ptr<_CntrlBlk, _D2> __hold2(__a2.allocate(1), _D2(__a2, 1)); + ::new ((void*)std::addressof(*__hold2.get())) #ifndef _LIBCPP_CXX03_LANG - _CntrlBlk(__p, std::move(__d), __a); + _CntrlBlk(__p, std::move(__d), __a); #else _CntrlBlk(__p, __d, __a); #endif // not _LIBCPP_CXX03_LANG - __cntrl_ = std::addressof(*__hold2.release()); -#if _LIBCPP_HAS_EXCEPTIONS - } catch (...) { - __d(__p); - throw; - } -#endif // _LIBCPP_HAS_EXCEPTIONS + __cntrl_ = std::addressof(*__hold2.release()); + __guard.__complete(); } template @@ -514,45 +483,16 @@ public: template ::value && __compatible_with<_Yp, _Tp>::value && + __enable_if_t<__compatible_with<_Yp, _Tp>::value && is_convertible::pointer, element_type*>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI shared_ptr(unique_ptr<_Yp, _Dp>&& __r) : __ptr_(__r.get()) { -#if _LIBCPP_STD_VER >= 14 - if (__ptr_ == nullptr) - __cntrl_ = nullptr; - else -#endif - { - typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT; - typedef __shared_ptr_pointer::pointer, _Dp, _AllocT> _CntrlBlk; - __cntrl_ = new _CntrlBlk(__r.get(), std::move(__r.get_deleter()), _AllocT()); - __enable_weak_this(__r.get(), __r.get()); - } - __r.release(); - } + using _AllocT = typename __shared_ptr_default_allocator<_Yp>::type; + using _Deleter = _If::value, reference_wrapper<__libcpp_remove_reference_t<_Dp> >, _Dp>; + using _CntrlBlk = __shared_ptr_pointer::pointer, _Deleter, _AllocT>; - template ::value && __compatible_with<_Yp, _Tp>::value && - is_convertible::pointer, element_type*>::value, - int> = 0> - _LIBCPP_HIDE_FROM_ABI shared_ptr(unique_ptr<_Yp, _Dp>&& __r) : __ptr_(__r.get()) { -#if _LIBCPP_STD_VER >= 14 - if (__ptr_ == nullptr) - __cntrl_ = nullptr; - else -#endif - { - typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT; - typedef __shared_ptr_pointer::pointer, - reference_wrapper<__libcpp_remove_reference_t<_Dp> >, - _AllocT> - _CntrlBlk; - __cntrl_ = new _CntrlBlk(__r.get(), std::ref(__r.get_deleter()), _AllocT()); - __enable_weak_this(__r.get(), __r.get()); - } + __cntrl_ = __ptr_ ? new _CntrlBlk(__r.get(), std::forward<_Dp>(__r.get_deleter()), _AllocT()) : nullptr; + __enable_weak_this(__r.get(), __r.get()); __r.release(); } @@ -628,37 +568,43 @@ public: shared_ptr(__p, __d, __a).swap(*this); } - _LIBCPP_HIDE_FROM_ABI element_type* get() const _NOEXCEPT { return __ptr_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI element_type* get() const _NOEXCEPT { return __ptr_; } - _LIBCPP_HIDE_FROM_ABI __add_lvalue_reference_t operator*() const _NOEXCEPT { return *__ptr_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI __add_lvalue_reference_t operator*() const _NOEXCEPT { + return *__ptr_; + } _LIBCPP_HIDE_FROM_ABI element_type* operator->() const _NOEXCEPT { static_assert(!is_array<_Tp>::value, "std::shared_ptr::operator-> is only valid when T is not an array type."); return __ptr_; } - _LIBCPP_HIDE_FROM_ABI long use_count() const _NOEXCEPT { return __cntrl_ ? __cntrl_->use_count() : 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI long use_count() const _NOEXCEPT { + return __cntrl_ ? __cntrl_->use_count() : 0; + } #if _LIBCPP_STD_VER < 20 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_SHARED_PTR_UNIQUE) - _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI bool unique() const _NOEXCEPT { return use_count() == 1; } + [[__nodiscard__]] _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI bool unique() const _NOEXCEPT { + return use_count() == 1; + } #endif _LIBCPP_HIDE_FROM_ABI explicit operator bool() const _NOEXCEPT { return get() != nullptr; } template - _LIBCPP_HIDE_FROM_ABI bool owner_before(shared_ptr<_Up> const& __p) const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool owner_before(shared_ptr<_Up> const& __p) const _NOEXCEPT { return __cntrl_ < __p.__cntrl_; } template - _LIBCPP_HIDE_FROM_ABI bool owner_before(weak_ptr<_Up> const& __p) const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool owner_before(weak_ptr<_Up> const& __p) const _NOEXCEPT { return __cntrl_ < __p.__cntrl_; } _LIBCPP_HIDE_FROM_ABI bool __owner_equivalent(const shared_ptr& __p) const { return __cntrl_ == __p.__cntrl_; } #if _LIBCPP_STD_VER >= 17 - _LIBCPP_HIDE_FROM_ABI __add_lvalue_reference_t operator[](ptrdiff_t __i) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI __add_lvalue_reference_t operator[](ptrdiff_t __i) const { static_assert(is_array<_Tp>::value, "std::shared_ptr::operator[] is only valid when T is an array type."); return __ptr_[__i]; } @@ -729,7 +675,7 @@ shared_ptr(unique_ptr<_Tp, _Dp>) -> shared_ptr<_Tp>; // std::allocate_shared and std::make_shared // template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a, _Args&&... __args) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a, _Args&&... __args) { using _ControlBlock = __shared_ptr_emplace<_Tp, _Alloc>; using _ControlBlockAllocator = typename __allocator_traits_rebind<_Alloc, _ControlBlock>::type; __allocation_guard<_ControlBlockAllocator> __guard(__a, 1); @@ -740,21 +686,21 @@ _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a, _Args&& } template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared(_Args&&... __args) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared(_Args&&... __args) { return std::allocate_shared<_Tp>(allocator<__remove_cv_t<_Tp> >(), std::forward<_Args>(__args)...); } #if _LIBCPP_STD_VER >= 20 template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared_for_overwrite(const _Alloc& __a) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared_for_overwrite(const _Alloc& __a) { using _ForOverwriteAllocator = __allocator_traits_rebind_t<_Alloc, __for_overwrite_tag>; _ForOverwriteAllocator __alloc(__a); return std::allocate_shared<_Tp>(__alloc); } template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared_for_overwrite() { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared_for_overwrite() { return std::allocate_shared_for_overwrite<_Tp>(allocator<__remove_cv_t<_Tp>>()); } @@ -946,67 +892,69 @@ _LIBCPP_HIDE_FROM_ABI shared_ptr<_Array> __allocate_shared_bounded_array(const _ // bounded array variants template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a) { return std::__allocate_shared_bounded_array<_Tp>(__a); } template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a, const remove_extent_t<_Tp>& __u) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> +allocate_shared(const _Alloc& __a, const remove_extent_t<_Tp>& __u) { return std::__allocate_shared_bounded_array<_Tp>(__a, __u); } template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared_for_overwrite(const _Alloc& __a) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared_for_overwrite(const _Alloc& __a) { using _ForOverwriteAllocator = __allocator_traits_rebind_t<_Alloc, __for_overwrite_tag>; _ForOverwriteAllocator __alloc(__a); return std::__allocate_shared_bounded_array<_Tp>(__alloc); } template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared() { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared() { return std::__allocate_shared_bounded_array<_Tp>(allocator<_Tp>()); } template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared(const remove_extent_t<_Tp>& __u) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared(const remove_extent_t<_Tp>& __u) { return std::__allocate_shared_bounded_array<_Tp>(allocator<_Tp>(), __u); } template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared_for_overwrite() { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared_for_overwrite() { return std::__allocate_shared_bounded_array<_Tp>(allocator<__for_overwrite_tag>()); } // unbounded array variants template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a, size_t __n) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a, size_t __n) { return std::__allocate_shared_unbounded_array<_Tp>(__a, __n); } template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a, size_t __n, const remove_extent_t<_Tp>& __u) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> +allocate_shared(const _Alloc& __a, size_t __n, const remove_extent_t<_Tp>& __u) { return std::__allocate_shared_unbounded_array<_Tp>(__a, __n, __u); } template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared_for_overwrite(const _Alloc& __a, size_t __n) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared_for_overwrite(const _Alloc& __a, size_t __n) { using _ForOverwriteAllocator = __allocator_traits_rebind_t<_Alloc, __for_overwrite_tag>; _ForOverwriteAllocator __alloc(__a); return std::__allocate_shared_unbounded_array<_Tp>(__alloc, __n); } template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared(size_t __n) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared(size_t __n) { return std::__allocate_shared_unbounded_array<_Tp>(allocator<_Tp>(), __n); } template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared(size_t __n, const remove_extent_t<_Tp>& __u) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared(size_t __n, const remove_extent_t<_Tp>& __u) { return std::__allocate_shared_unbounded_array<_Tp>(allocator<_Tp>(), __n, __u); } template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared_for_overwrite(size_t __n) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared_for_overwrite(size_t __n) { return std::__allocate_shared_unbounded_array<_Tp>(allocator<__for_overwrite_tag>(), __n); } @@ -1135,7 +1083,8 @@ inline _LIBCPP_HIDE_FROM_ABI void swap(shared_ptr<_Tp>& __x, shared_ptr<_Tp>& __ } template -inline _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> static_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> +static_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT { return shared_ptr<_Tp>(__r, static_cast< typename shared_ptr<_Tp>::element_type*>(__r.get())); } @@ -1143,13 +1092,14 @@ inline _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> static_pointer_cast(const shared_pt // We don't backport because it is an evolutionary change. #if _LIBCPP_STD_VER >= 20 template -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> static_pointer_cast(shared_ptr<_Up>&& __r) noexcept { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> static_pointer_cast(shared_ptr<_Up>&& __r) noexcept { return shared_ptr<_Tp>(std::move(__r), static_cast::element_type*>(__r.get())); } #endif template -inline _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> dynamic_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT { +[[__nodiscard__]] inline + _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> dynamic_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT { typedef typename shared_ptr<_Tp>::element_type _ET; _ET* __p = dynamic_cast<_ET*>(__r.get()); return __p ? shared_ptr<_Tp>(__r, __p) : shared_ptr<_Tp>(); @@ -1159,14 +1109,14 @@ inline _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> dynamic_pointer_cast(const shared_p // We don't backport because it is an evolutionary change. #if _LIBCPP_STD_VER >= 20 template -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> dynamic_pointer_cast(shared_ptr<_Up>&& __r) noexcept { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> dynamic_pointer_cast(shared_ptr<_Up>&& __r) noexcept { auto* __p = dynamic_cast::element_type*>(__r.get()); return __p ? shared_ptr<_Tp>(std::move(__r), __p) : shared_ptr<_Tp>(); } #endif template -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> const_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> const_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT { typedef typename shared_ptr<_Tp>::element_type _RTp; return shared_ptr<_Tp>(__r, const_cast<_RTp*>(__r.get())); } @@ -1175,13 +1125,13 @@ _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> const_pointer_cast(const shared_ptr<_Up>& // We don't backport because it is an evolutionary change. #if _LIBCPP_STD_VER >= 20 template -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> const_pointer_cast(shared_ptr<_Up>&& __r) noexcept { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> const_pointer_cast(shared_ptr<_Up>&& __r) noexcept { return shared_ptr<_Tp>(std::move(__r), const_cast::element_type*>(__r.get())); } #endif template -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> reinterpret_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> reinterpret_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT { return shared_ptr<_Tp>(__r, reinterpret_cast< typename shared_ptr<_Tp>::element_type*>(__r.get())); } @@ -1189,7 +1139,7 @@ _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> reinterpret_pointer_cast(const shared_ptr< // We don't backport because it is an evolutionary change. #if _LIBCPP_STD_VER >= 20 template -_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> reinterpret_pointer_cast(shared_ptr<_Up>&& __r) noexcept { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> reinterpret_pointer_cast(shared_ptr<_Up>&& __r) noexcept { return shared_ptr<_Tp>(std::move(__r), reinterpret_cast::element_type*>(__r.get())); } #endif @@ -1197,7 +1147,7 @@ _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> reinterpret_pointer_cast(shared_ptr<_Up>&& #if _LIBCPP_HAS_RTTI template -inline _LIBCPP_HIDE_FROM_ABI _Dp* get_deleter(const shared_ptr<_Tp>& __p) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _Dp* get_deleter(const shared_ptr<_Tp>& __p) _NOEXCEPT { return __p.template __get_deleter<_Dp>(); } @@ -1213,9 +1163,8 @@ public: #endif // A weak_ptr contains only two raw pointers which point to the heap and move constructing already doesn't require - // any bookkeeping, so it's always trivially relocatable. It's also replaceable for the same reason. + // any bookkeeping, so it's always trivially relocatable. using __trivially_relocatable _LIBCPP_NODEBUG = weak_ptr; - using __replaceable _LIBCPP_NODEBUG = weak_ptr; private: element_type* __ptr_; @@ -1253,15 +1202,19 @@ public: _LIBCPP_HIDE_FROM_ABI void swap(weak_ptr& __r) _NOEXCEPT; _LIBCPP_HIDE_FROM_ABI void reset() _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI long use_count() const _NOEXCEPT { return __cntrl_ ? __cntrl_->use_count() : 0; } - _LIBCPP_HIDE_FROM_ABI bool expired() const _NOEXCEPT { return __cntrl_ == nullptr || __cntrl_->use_count() == 0; } - _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> lock() const _NOEXCEPT; + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI long use_count() const _NOEXCEPT { + return __cntrl_ ? __cntrl_->use_count() : 0; + } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool expired() const _NOEXCEPT { + return __cntrl_ == nullptr || __cntrl_->use_count() == 0; + } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> lock() const _NOEXCEPT; template - _LIBCPP_HIDE_FROM_ABI bool owner_before(const shared_ptr<_Up>& __r) const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool owner_before(const shared_ptr<_Up>& __r) const _NOEXCEPT { return __cntrl_ < __r.__cntrl_; } template - _LIBCPP_HIDE_FROM_ABI bool owner_before(const weak_ptr<_Up>& __r) const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool owner_before(const weak_ptr<_Up>& __r) const _NOEXCEPT { return __cntrl_ < __r.__cntrl_; } @@ -1445,13 +1398,15 @@ protected: _LIBCPP_HIDE_FROM_ABI ~enable_shared_from_this() {} public: - _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> shared_from_this() { return shared_ptr<_Tp>(__weak_this_); } - _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp const> shared_from_this() const { return shared_ptr(__weak_this_); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> shared_from_this() { return shared_ptr<_Tp>(__weak_this_); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp const> shared_from_this() const { + return shared_ptr(__weak_this_); + } #if _LIBCPP_STD_VER >= 17 - _LIBCPP_HIDE_FROM_ABI weak_ptr<_Tp> weak_from_this() _NOEXCEPT { return __weak_this_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI weak_ptr<_Tp> weak_from_this() _NOEXCEPT { return __weak_this_; } - _LIBCPP_HIDE_FROM_ABI weak_ptr weak_from_this() const _NOEXCEPT { return __weak_this_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI weak_ptr weak_from_this() const _NOEXCEPT { return __weak_this_; } #endif // _LIBCPP_STD_VER >= 17 template @@ -1468,7 +1423,7 @@ struct hash > { _LIBCPP_DEPRECATED_IN_CXX17 typedef size_t result_type; #endif - _LIBCPP_HIDE_FROM_ABI size_t operator()(const shared_ptr<_Tp>& __ptr) const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t operator()(const shared_ptr<_Tp>& __ptr) const _NOEXCEPT { return hash::element_type*>()(__ptr.get()); } }; diff --git a/lib/libcxx/include/__memory/temp_value.h b/lib/libcxx/include/__memory/temp_value.h index 4a133b3fbc..5285bcab9a 100644 --- a/lib/libcxx/include/__memory/temp_value.h +++ b/lib/libcxx/include/__memory/temp_value.h @@ -12,7 +12,6 @@ #include <__config> #include <__memory/addressof.h> #include <__memory/allocator_traits.h> -#include <__type_traits/aligned_storage.h> #include <__utility/forward.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -26,7 +25,7 @@ struct __temp_value { typedef allocator_traits<_Alloc> _Traits; #ifdef _LIBCPP_CXX03_LANG - typename aligned_storage::type __v; + _ALIGNAS_TYPE(_Tp) char __v[sizeof(_Tp)]; #else union { _Tp __v; diff --git a/lib/libcxx/include/__memory/uninitialized_algorithms.h b/lib/libcxx/include/__memory/uninitialized_algorithms.h index e802366400..9182db4b41 100644 --- a/lib/libcxx/include/__memory/uninitialized_algorithms.h +++ b/lib/libcxx/include/__memory/uninitialized_algorithms.h @@ -32,7 +32,6 @@ #include <__type_traits/is_trivially_assignable.h> #include <__type_traits/is_trivially_constructible.h> #include <__type_traits/is_trivially_relocatable.h> -#include <__type_traits/is_unbounded_array.h> #include <__type_traits/remove_const.h> #include <__type_traits/remove_extent.h> #include <__utility/exception_guard.h> @@ -61,17 +60,10 @@ template __uninitialized_copy( _InputIterator __ifirst, _Sentinel1 __ilast, _ForwardIterator __ofirst, _EndPredicate __stop_copying) { _ForwardIterator __idx = __ofirst; -#if _LIBCPP_HAS_EXCEPTIONS - try { -#endif - for (; __ifirst != __ilast && !__stop_copying(__idx); ++__ifirst, (void)++__idx) - ::new (static_cast(std::addressof(*__idx))) _ValueType(*__ifirst); -#if _LIBCPP_HAS_EXCEPTIONS - } catch (...) { - std::__destroy(__ofirst, __idx); - throw; - } -#endif + auto __guard = std::__make_exception_guard([&] { std::__destroy(__ofirst, __idx); }); + for (; __ifirst != __ilast && !__stop_copying(__idx); ++__ifirst, (void)++__idx) + ::new (static_cast(std::addressof(*__idx))) _ValueType(*__ifirst); + __guard.__complete(); return pair<_InputIterator, _ForwardIterator>(std::move(__ifirst), std::move(__idx)); } @@ -91,17 +83,10 @@ template __uninitialized_copy_n(_InputIterator __ifirst, _Size __n, _ForwardIterator __ofirst, _EndPredicate __stop_copying) { _ForwardIterator __idx = __ofirst; -#if _LIBCPP_HAS_EXCEPTIONS - try { -#endif - for (; __n > 0 && !__stop_copying(__idx); ++__ifirst, (void)++__idx, (void)--__n) - ::new (static_cast(std::addressof(*__idx))) _ValueType(*__ifirst); -#if _LIBCPP_HAS_EXCEPTIONS - } catch (...) { - std::__destroy(__ofirst, __idx); - throw; - } -#endif + auto __guard = std::__make_exception_guard([&] { std::__destroy(__ofirst, __idx); }); + for (; __n > 0 && !__stop_copying(__idx); ++__ifirst, (void)++__idx, (void)--__n) + ::new (static_cast(std::addressof(*__idx))) _ValueType(*__ifirst); + __guard.__complete(); return pair<_InputIterator, _ForwardIterator>(std::move(__ifirst), std::move(__idx)); } @@ -121,17 +106,10 @@ template inline _LIBCPP_HIDE_FROM_ABI _ForwardIterator __uninitialized_fill(_ForwardIterator __first, _Sentinel __last, const _Tp& __x) { _ForwardIterator __idx = __first; -#if _LIBCPP_HAS_EXCEPTIONS - try { -#endif - for (; __idx != __last; ++__idx) - ::new (static_cast(std::addressof(*__idx))) _ValueType(__x); -#if _LIBCPP_HAS_EXCEPTIONS - } catch (...) { - std::__destroy(__first, __idx); - throw; - } -#endif + auto __guard = std::__make_exception_guard([&] { std::__destroy(__first, __idx); }); + for (; __idx != __last; ++__idx) + ::new (static_cast(std::addressof(*__idx))) _ValueType(__x); + __guard.__complete(); return __idx; } @@ -149,17 +127,10 @@ template inline _LIBCPP_HIDE_FROM_ABI _ForwardIterator __uninitialized_fill_n(_ForwardIterator __first, _Size __n, const _Tp& __x) { _ForwardIterator __idx = __first; -#if _LIBCPP_HAS_EXCEPTIONS - try { -#endif - for (; __n > 0; ++__idx, (void)--__n) - ::new (static_cast(std::addressof(*__idx))) _ValueType(__x); -#if _LIBCPP_HAS_EXCEPTIONS - } catch (...) { - std::__destroy(__first, __idx); - throw; - } -#endif + auto __guard = std::__make_exception_guard([&] { std::__destroy(__first, __idx); }); + for (; __n > 0; ++__idx, (void)--__n) + ::new (static_cast(std::addressof(*__idx))) _ValueType(__x); + __guard.__complete(); return __idx; } @@ -178,18 +149,11 @@ uninitialized_fill_n(_ForwardIterator __first, _Size __n, const _Tp& __x) { template inline _LIBCPP_HIDE_FROM_ABI _ForwardIterator __uninitialized_default_construct(_ForwardIterator __first, _Sentinel __last) { - auto __idx = __first; -# if _LIBCPP_HAS_EXCEPTIONS - try { -# endif - for (; __idx != __last; ++__idx) - ::new (static_cast(std::addressof(*__idx))) _ValueType; -# if _LIBCPP_HAS_EXCEPTIONS - } catch (...) { - std::__destroy(__first, __idx); - throw; - } -# endif + auto __idx = __first; + auto __guard = std::__make_exception_guard([&] { std::__destroy(__first, __idx); }); + for (; __idx != __last; ++__idx) + ::new (static_cast(std::addressof(*__idx))) _ValueType; + __guard.__complete(); return __idx; } @@ -205,17 +169,10 @@ inline _LIBCPP_HIDE_FROM_ABI void uninitialized_default_construct(_ForwardIterat template inline _LIBCPP_HIDE_FROM_ABI _ForwardIterator __uninitialized_default_construct_n(_ForwardIterator __first, _Size __n) { auto __idx = __first; -# if _LIBCPP_HAS_EXCEPTIONS - try { -# endif - for (; __n > 0; ++__idx, (void)--__n) - ::new (static_cast(std::addressof(*__idx))) _ValueType; -# if _LIBCPP_HAS_EXCEPTIONS - } catch (...) { - std::__destroy(__first, __idx); - throw; - } -# endif + auto __guard = std::__make_exception_guard([&] { std::__destroy(__first, __idx); }); + for (; __n > 0; ++__idx, (void)--__n) + ::new (static_cast(std::addressof(*__idx))) _ValueType; + __guard.__complete(); return __idx; } @@ -231,18 +188,11 @@ inline _LIBCPP_HIDE_FROM_ABI _ForwardIterator uninitialized_default_construct_n( template inline _LIBCPP_HIDE_FROM_ABI _ForwardIterator __uninitialized_value_construct(_ForwardIterator __first, _Sentinel __last) { - auto __idx = __first; -# if _LIBCPP_HAS_EXCEPTIONS - try { -# endif - for (; __idx != __last; ++__idx) - ::new (static_cast(std::addressof(*__idx))) _ValueType(); -# if _LIBCPP_HAS_EXCEPTIONS - } catch (...) { - std::__destroy(__first, __idx); - throw; - } -# endif + auto __idx = __first; + auto __guard = std::__make_exception_guard([&] { std::__destroy(__first, __idx); }); + for (; __idx != __last; ++__idx) + ::new (static_cast(std::addressof(*__idx))) _ValueType(); + __guard.__complete(); return __idx; } @@ -258,17 +208,10 @@ inline _LIBCPP_HIDE_FROM_ABI void uninitialized_value_construct(_ForwardIterator template inline _LIBCPP_HIDE_FROM_ABI _ForwardIterator __uninitialized_value_construct_n(_ForwardIterator __first, _Size __n) { auto __idx = __first; -# if _LIBCPP_HAS_EXCEPTIONS - try { -# endif - for (; __n > 0; ++__idx, (void)--__n) - ::new (static_cast(std::addressof(*__idx))) _ValueType(); -# if _LIBCPP_HAS_EXCEPTIONS - } catch (...) { - std::__destroy(__first, __idx); - throw; - } -# endif + auto __guard = std::__make_exception_guard([&] { std::__destroy(__first, __idx); }); + for (; __n > 0; ++__idx, (void)--__n) + ::new (static_cast(std::addressof(*__idx))) _ValueType(); + __guard.__complete(); return __idx; } @@ -293,19 +236,12 @@ inline _LIBCPP_HIDE_FROM_ABI pair<_InputIterator, _ForwardIterator> __uninitiali _ForwardIterator __ofirst, _EndPredicate __stop_moving, _IterMove __iter_move) { - auto __idx = __ofirst; -# if _LIBCPP_HAS_EXCEPTIONS - try { -# endif - for (; __ifirst != __ilast && !__stop_moving(__idx); ++__idx, (void)++__ifirst) { - ::new (static_cast(std::addressof(*__idx))) _ValueType(__iter_move(__ifirst)); - } -# if _LIBCPP_HAS_EXCEPTIONS - } catch (...) { - std::__destroy(__ofirst, __idx); - throw; + auto __idx = __ofirst; + auto __guard = std::__make_exception_guard([&] { std::__destroy(__ofirst, __idx); }); + for (; __ifirst != __ilast && !__stop_moving(__idx); ++__idx, (void)++__ifirst) { + ::new (static_cast(std::addressof(*__idx))) _ValueType(__iter_move(__ifirst)); } -# endif + __guard.__complete(); return {std::move(__ifirst), std::move(__idx)}; } @@ -331,18 +267,11 @@ template inline _LIBCPP_HIDE_FROM_ABI pair<_InputIterator, _ForwardIterator> __uninitialized_move_n( _InputIterator __ifirst, _Size __n, _ForwardIterator __ofirst, _EndPredicate __stop_moving, _IterMove __iter_move) { - auto __idx = __ofirst; -# if _LIBCPP_HAS_EXCEPTIONS - try { -# endif - for (; __n > 0 && !__stop_moving(__idx); ++__idx, (void)++__ifirst, --__n) - ::new (static_cast(std::addressof(*__idx))) _ValueType(__iter_move(__ifirst)); -# if _LIBCPP_HAS_EXCEPTIONS - } catch (...) { - std::__destroy(__ofirst, __idx); - throw; - } -# endif + auto __idx = __ofirst; + auto __guard = std::__make_exception_guard([&] { std::__destroy(__ofirst, __idx); }); + for (; __n > 0 && !__stop_moving(__idx); ++__idx, (void)++__ifirst, --__n) + ::new (static_cast(std::addressof(*__idx))) _ValueType(__iter_move(__ifirst)); + __guard.__complete(); return {std::move(__ifirst), std::move(__idx)}; } diff --git a/lib/libcxx/include/__memory/unique_ptr.h b/lib/libcxx/include/__memory/unique_ptr.h index eff24546cd..6a4ec0a466 100644 --- a/lib/libcxx/include/__memory/unique_ptr.h +++ b/lib/libcxx/include/__memory/unique_ptr.h @@ -32,18 +32,15 @@ #include <__type_traits/integral_constant.h> #include <__type_traits/is_array.h> #include <__type_traits/is_assignable.h> -#include <__type_traits/is_bounded_array.h> #include <__type_traits/is_constant_evaluated.h> #include <__type_traits/is_constructible.h> #include <__type_traits/is_convertible.h> #include <__type_traits/is_function.h> #include <__type_traits/is_pointer.h> #include <__type_traits/is_reference.h> -#include <__type_traits/is_replaceable.h> #include <__type_traits/is_same.h> #include <__type_traits/is_swappable.h> #include <__type_traits/is_trivially_relocatable.h> -#include <__type_traits/is_unbounded_array.h> #include <__type_traits/is_void.h> #include <__type_traits/remove_extent.h> #include <__type_traits/type_identity.h> @@ -145,8 +142,6 @@ public: __libcpp_is_trivially_relocatable::value && __libcpp_is_trivially_relocatable::value, unique_ptr, void>; - using __replaceable _LIBCPP_NODEBUG = - __conditional_t<__is_replaceable_v && __is_replaceable_v, unique_ptr, void>; private: _LIBCPP_COMPRESSED_PAIR(pointer, __ptr_, deleter_type, __deleter_); @@ -263,14 +258,17 @@ public: return *this; } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 __add_lvalue_reference_t<_Tp> operator*() const + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 __add_lvalue_reference_t<_Tp> operator*() const _NOEXCEPT_(_NOEXCEPT_(*std::declval())) { return *__ptr_; } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 pointer operator->() const _NOEXCEPT { return __ptr_; } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 pointer get() const _NOEXCEPT { return __ptr_; } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 deleter_type& get_deleter() _NOEXCEPT { return __deleter_; } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 const deleter_type& get_deleter() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 pointer get() const _NOEXCEPT { return __ptr_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 deleter_type& get_deleter() _NOEXCEPT { + return __deleter_; + } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 const deleter_type& + get_deleter() const _NOEXCEPT { return __deleter_; } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 explicit operator bool() const _NOEXCEPT { @@ -413,8 +411,6 @@ public: __libcpp_is_trivially_relocatable::value && __libcpp_is_trivially_relocatable::value, unique_ptr, void>; - using __replaceable _LIBCPP_NODEBUG = - __conditional_t<__is_replaceable_v && __is_replaceable_v, unique_ptr, void>; private: template @@ -755,12 +751,13 @@ operator<=>(const unique_ptr<_T1, _D1>& __x, nullptr_t) { #if _LIBCPP_STD_VER >= 14 template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unique_ptr<_Tp> make_unique(_Args&&... __args) { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI +_LIBCPP_CONSTEXPR_SINCE_CXX23 unique_ptr<_Tp> make_unique(_Args&&... __args) { return unique_ptr<_Tp>(new _Tp(std::forward<_Args>(__args)...)); } template , int> = 0> -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unique_ptr<_Tp> make_unique(size_t __n) { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unique_ptr<_Tp> make_unique(size_t __n) { typedef __remove_extent_t<_Tp> _Up; return unique_ptr<_Tp>(__private_constructor_tag(), new _Up[__n](), __n); } @@ -773,12 +770,13 @@ void make_unique(_Args&&...) = delete; #if _LIBCPP_STD_VER >= 20 template , int> = 0> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unique_ptr<_Tp> make_unique_for_overwrite() { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unique_ptr<_Tp> make_unique_for_overwrite() { return unique_ptr<_Tp>(new _Tp); } template , int> = 0> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unique_ptr<_Tp> make_unique_for_overwrite(size_t __n) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unique_ptr<_Tp> +make_unique_for_overwrite(size_t __n) { return unique_ptr<_Tp>(__private_constructor_tag(), new __remove_extent_t<_Tp>[__n], __n); } @@ -802,7 +800,7 @@ struct hash<__enable_hash_helper< unique_ptr<_Tp, _Dp>, typename unique_ptr<_Tp, _LIBCPP_DEPRECATED_IN_CXX17 typedef size_t result_type; #endif - _LIBCPP_HIDE_FROM_ABI size_t operator()(const unique_ptr<_Tp, _Dp>& __ptr) const { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t operator()(const unique_ptr<_Tp, _Dp>& __ptr) const { typedef typename unique_ptr<_Tp, _Dp>::pointer pointer; return hash()(__ptr.get()); } diff --git a/lib/libcxx/include/__memory/uses_allocator_construction.h b/lib/libcxx/include/__memory/uses_allocator_construction.h index 49ddf99d9c..6733f5cf6f 100644 --- a/lib/libcxx/include/__memory/uses_allocator_construction.h +++ b/lib/libcxx/include/__memory/uses_allocator_construction.h @@ -17,6 +17,7 @@ #include <__type_traits/remove_cv.h> #include <__utility/declval.h> #include <__utility/pair.h> +#include <__utility/piecewise_construct.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/lib/libcxx/include/__memory_resource/memory_resource.h b/lib/libcxx/include/__memory_resource/memory_resource.h index f93f10fe21..5b42ae5489 100644 --- a/lib/libcxx/include/__memory_resource/memory_resource.h +++ b/lib/libcxx/include/__memory_resource/memory_resource.h @@ -42,7 +42,9 @@ public: do_deallocate(__p, __bytes, __align); } - _LIBCPP_HIDE_FROM_ABI bool is_equal(const memory_resource& __other) const noexcept { return do_is_equal(__other); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_equal(const memory_resource& __other) const noexcept { + return do_is_equal(__other); + } private: virtual void* do_allocate(size_t, size_t) = 0; @@ -68,7 +70,7 @@ operator!=(const memory_resource& __lhs, const memory_resource& __rhs) noexcept // [mem.res.global] -[[__gnu__::__returns_nonnull__]] _LIBCPP_AVAILABILITY_PMR _LIBCPP_EXPORTED_FROM_ABI memory_resource* +[[nodiscard, __gnu__::__returns_nonnull__]] _LIBCPP_AVAILABILITY_PMR _LIBCPP_EXPORTED_FROM_ABI memory_resource* get_default_resource() noexcept; [[__gnu__::__returns_nonnull__]] _LIBCPP_AVAILABILITY_PMR _LIBCPP_EXPORTED_FROM_ABI memory_resource* diff --git a/lib/libcxx/include/__memory_resource/monotonic_buffer_resource.h b/lib/libcxx/include/__memory_resource/monotonic_buffer_resource.h index 942d490ce3..9c7b07df52 100644 --- a/lib/libcxx/include/__memory_resource/monotonic_buffer_resource.h +++ b/lib/libcxx/include/__memory_resource/monotonic_buffer_resource.h @@ -93,7 +93,7 @@ public: } } - _LIBCPP_HIDE_FROM_ABI memory_resource* upstream_resource() const { return __res_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI memory_resource* upstream_resource() const { return __res_; } protected: void* do_allocate(size_t __bytes, size_t __alignment) override; // key function diff --git a/lib/libcxx/include/__memory_resource/polymorphic_allocator.h b/lib/libcxx/include/__memory_resource/polymorphic_allocator.h index b95c6a37c5..b01541fa0e 100644 --- a/lib/libcxx/include/__memory_resource/polymorphic_allocator.h +++ b/lib/libcxx/include/__memory_resource/polymorphic_allocator.h @@ -18,6 +18,7 @@ #include <__new/exceptions.h> #include <__new/placement_new_delete.h> #include <__utility/exception_guard.h> +#include <__utility/piecewise_construct.h> #include #include @@ -50,7 +51,9 @@ public: _LIBCPP_HIDE_FROM_ABI polymorphic_allocator() noexcept : __res_(std::pmr::get_default_resource()) {} - _LIBCPP_HIDE_FROM_ABI polymorphic_allocator(memory_resource* __r) noexcept : __res_(__r) {} + _LIBCPP_HIDE_FROM_ABI polymorphic_allocator(memory_resource* _LIBCPP_DIAGNOSE_NULLPTR __r) noexcept : __res_(__r) { + _LIBCPP_ASSERT_NON_NULL(__r, "Attempted to pass a nullptr resource to polymorphic_alloator"); + } _LIBCPP_HIDE_FROM_ABI polymorphic_allocator(const polymorphic_allocator&) = default; @@ -133,10 +136,10 @@ public: piecewise_construct, __transform_tuple(typename __uses_alloc_ctor< _T1, polymorphic_allocator&, _Args1... >::type(), std::move(__x), - typename __make_tuple_indices::type{}), + make_index_sequence()), __transform_tuple(typename __uses_alloc_ctor< _T2, polymorphic_allocator&, _Args2... >::type(), std::move(__y), - typename __make_tuple_indices::type{})); + make_index_sequence())); } template @@ -170,11 +173,13 @@ public: __p->~_Tp(); } - _LIBCPP_HIDE_FROM_ABI polymorphic_allocator select_on_container_copy_construction() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI polymorphic_allocator select_on_container_copy_construction() const noexcept { return polymorphic_allocator(); } - _LIBCPP_HIDE_FROM_ABI memory_resource* resource() const noexcept { return __res_; } + [[nodiscard, __gnu__::__returns_nonnull__]] _LIBCPP_HIDE_FROM_ABI memory_resource* resource() const noexcept { + return __res_; + } _LIBCPP_HIDE_FROM_ABI friend bool operator==(const polymorphic_allocator& __lhs, const polymorphic_allocator& __rhs) noexcept { @@ -192,20 +197,20 @@ public: private: template _LIBCPP_HIDE_FROM_ABI tuple<_Args&&...> - __transform_tuple(integral_constant, tuple<_Args...>&& __t, __tuple_indices<_Is...>) { + __transform_tuple(integral_constant, tuple<_Args...>&& __t, index_sequence<_Is...>) { return std::forward_as_tuple(std::get<_Is>(std::move(__t))...); } template _LIBCPP_HIDE_FROM_ABI tuple - __transform_tuple(integral_constant, tuple<_Args...>&& __t, __tuple_indices<_Is...>) { + __transform_tuple(integral_constant, tuple<_Args...>&& __t, index_sequence<_Is...>) { using _Tup = tuple; return _Tup(allocator_arg, *this, std::get<_Is>(std::move(__t))...); } template _LIBCPP_HIDE_FROM_ABI tuple<_Args&&..., polymorphic_allocator&> - __transform_tuple(integral_constant, tuple<_Args...>&& __t, __tuple_indices<_Is...>) { + __transform_tuple(integral_constant, tuple<_Args...>&& __t, index_sequence<_Is...>) { using _Tup = tuple<_Args&&..., polymorphic_allocator&>; return _Tup(std::get<_Is>(std::move(__t))..., *this); } diff --git a/lib/libcxx/include/__memory_resource/pool_options.h b/lib/libcxx/include/__memory_resource/pool_options.h index 324b8aaa85..fd20ced567 100644 --- a/lib/libcxx/include/__memory_resource/pool_options.h +++ b/lib/libcxx/include/__memory_resource/pool_options.h @@ -24,7 +24,7 @@ namespace pmr { // [mem.res.pool.options] -struct _LIBCPP_EXPORTED_FROM_ABI pool_options { +struct pool_options { size_t max_blocks_per_chunk = 0; size_t largest_required_pool_block = 0; }; diff --git a/lib/libcxx/include/__memory_resource/synchronized_pool_resource.h b/lib/libcxx/include/__memory_resource/synchronized_pool_resource.h index bcc1ac4a17..1c929675bb 100644 --- a/lib/libcxx/include/__memory_resource/synchronized_pool_resource.h +++ b/lib/libcxx/include/__memory_resource/synchronized_pool_resource.h @@ -56,9 +56,11 @@ public: __unsync_.release(); } - _LIBCPP_HIDE_FROM_ABI memory_resource* upstream_resource() const { return __unsync_.upstream_resource(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI memory_resource* upstream_resource() const { + return __unsync_.upstream_resource(); + } - _LIBCPP_HIDE_FROM_ABI pool_options options() const { return __unsync_.options(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI pool_options options() const { return __unsync_.options(); } protected: _LIBCPP_HIDE_FROM_ABI_VIRTUAL void* do_allocate(size_t __bytes, size_t __align) override { diff --git a/lib/libcxx/include/__memory_resource/unsynchronized_pool_resource.h b/lib/libcxx/include/__memory_resource/unsynchronized_pool_resource.h index 92da16c559..89198a1b7c 100644 --- a/lib/libcxx/include/__memory_resource/unsynchronized_pool_resource.h +++ b/lib/libcxx/include/__memory_resource/unsynchronized_pool_resource.h @@ -76,7 +76,7 @@ public: void release(); - _LIBCPP_HIDE_FROM_ABI memory_resource* upstream_resource() const { return __res_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI memory_resource* upstream_resource() const { return __res_; } [[__gnu__::__pure__]] pool_options options() const; diff --git a/lib/libcxx/include/__mutex/mutex.h b/lib/libcxx/include/__mutex/mutex.h index 68c8842b35..e9cedf8db1 100644 --- a/lib/libcxx/include/__mutex/mutex.h +++ b/lib/libcxx/include/__mutex/mutex.h @@ -37,11 +37,11 @@ public: # endif _LIBCPP_ACQUIRE_CAPABILITY() void lock(); - _LIBCPP_TRY_ACQUIRE_CAPABILITY(true) bool try_lock() _NOEXCEPT; + [[__nodiscard__]] _LIBCPP_TRY_ACQUIRE_CAPABILITY(true) bool try_lock() _NOEXCEPT; _LIBCPP_RELEASE_CAPABILITY void unlock() _NOEXCEPT; typedef __libcpp_mutex_t* native_handle_type; - _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() { return &__m_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() { return &__m_; } }; static_assert(is_nothrow_default_constructible::value, "the default constructor for std::mutex must be nothrow"); diff --git a/lib/libcxx/include/__mutex/once_flag.h b/lib/libcxx/include/__mutex/once_flag.h index 3306449955..ad15b2eb6d 100644 --- a/lib/libcxx/include/__mutex/once_flag.h +++ b/lib/libcxx/include/__mutex/once_flag.h @@ -10,12 +10,11 @@ #define _LIBCPP___MUTEX_ONCE_FLAG_H #include <__config> -#include <__functional/invoke.h> #include <__memory/addressof.h> -#include <__memory/shared_count.h> // __libcpp_acquire_load -#include <__tuple/tuple_indices.h> #include <__tuple/tuple_size.h> +#include <__type_traits/invoke.h> #include <__utility/forward.h> +#include <__utility/integer_sequence.h> #include <__utility/move.h> #include #ifndef _LIBCPP_CXX03_LANG @@ -88,14 +87,9 @@ public: _LIBCPP_HIDE_FROM_ABI explicit __call_once_param(_Fp& __f) : __f_(__f) {} _LIBCPP_HIDE_FROM_ABI void operator()() { - typedef typename __make_tuple_indices::value, 1>::type _Index; - __execute(_Index()); - } - -private: - template - _LIBCPP_HIDE_FROM_ABI void __execute(__tuple_indices<_Indices...>) { - std::__invoke(std::get<0>(std::move(__f_)), std::get<_Indices>(std::move(__f_))...); + [&](__index_sequence<_Indices...>) -> void { + std::__invoke(std::get<_Indices>(std::move(__f_))...); + }(__make_index_sequence::value>()); } }; @@ -121,6 +115,15 @@ void _LIBCPP_HIDE_FROM_ABI __call_once_proxy(void* __vp) { _LIBCPP_EXPORTED_FROM_ABI void __call_once(volatile once_flag::_State_type&, void*, void (*)(void*)); +template +inline _LIBCPP_HIDE_FROM_ABI _ValueType __libcpp_acquire_load(_ValueType const* __value) { +#if _LIBCPP_HAS_THREADS + return __atomic_load_n(__value, __ATOMIC_ACQUIRE); +#else + return *__value; +#endif +} + #ifndef _LIBCPP_CXX03_LANG template diff --git a/lib/libcxx/include/__mutex/tag_types.h b/lib/libcxx/include/__mutex/tag_types.h index 2b2dd58ee4..36b1a3d92b 100644 --- a/lib/libcxx/include/__mutex/tag_types.h +++ b/lib/libcxx/include/__mutex/tag_types.h @@ -17,15 +17,15 @@ _LIBCPP_BEGIN_NAMESPACE_STD -struct _LIBCPP_EXPORTED_FROM_ABI defer_lock_t { +struct defer_lock_t { explicit defer_lock_t() = default; }; -struct _LIBCPP_EXPORTED_FROM_ABI try_to_lock_t { +struct try_to_lock_t { explicit try_to_lock_t() = default; }; -struct _LIBCPP_EXPORTED_FROM_ABI adopt_lock_t { +struct adopt_lock_t { explicit adopt_lock_t() = default; }; diff --git a/lib/libcxx/include/__mutex/unique_lock.h b/lib/libcxx/include/__mutex/unique_lock.h index aea93eb9b8..6968922639 100644 --- a/lib/libcxx/include/__mutex/unique_lock.h +++ b/lib/libcxx/include/__mutex/unique_lock.h @@ -15,6 +15,7 @@ #include <__memory/addressof.h> #include <__mutex/tag_types.h> #include <__system_error/throw_system_error.h> +#include <__utility/move.h> #include <__utility/swap.h> #include @@ -22,6 +23,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template @@ -74,13 +78,8 @@ public: } _LIBCPP_HIDE_FROM_ABI unique_lock& operator=(unique_lock&& __u) _NOEXCEPT { - if (__owns_) - __m_->unlock(); - - __m_ = __u.__m_; - __owns_ = __u.__owns_; - __u.__m_ = nullptr; - __u.__owns_ = false; + if (this != std::addressof(__u)) + unique_lock(std::move(__u)).swap(*this); return *this; } @@ -170,4 +169,6 @@ inline _LIBCPP_HIDE_FROM_ABI void swap(unique_lock<_Mutex>& __x, unique_lock<_Mu _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___MUTEX_UNIQUE_LOCK_H diff --git a/lib/libcxx/include/__new/align_val_t.h b/lib/libcxx/include/__new/align_val_t.h index 03ab7cb143..d8ce528334 100644 --- a/lib/libcxx/include/__new/align_val_t.h +++ b/lib/libcxx/include/__new/align_val_t.h @@ -16,6 +16,12 @@ # pragma GCC system_header #endif +// defines its own std::align_val_t type, +// which we use in order to be ABI-compatible with other STLs on Windows. +#if _LIBCPP_HAS_LIBRARY_ALIGNED_ALLOCATION && defined(_LIBCPP_ABI_VCRUNTIME) +# include +#endif + _LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD #if _LIBCPP_HAS_LIBRARY_ALIGNED_ALLOCATION && !defined(_LIBCPP_ABI_VCRUNTIME) # ifndef _LIBCPP_CXX03_LANG diff --git a/lib/libcxx/include/__new/allocate.h b/lib/libcxx/include/__new/allocate.h index 9bfe19aedb..b9bc2e1a50 100644 --- a/lib/libcxx/include/__new/allocate.h +++ b/lib/libcxx/include/__new/allocate.h @@ -13,7 +13,6 @@ #include <__cstddef/max_align_t.h> #include <__cstddef/size_t.h> #include <__new/align_val_t.h> -#include <__new/global_new_delete.h> // for _LIBCPP_HAS_SIZED_DEALLOCATION #include <__type_traits/type_identity.h> #include <__utility/element_count.h> @@ -43,7 +42,7 @@ __libcpp_allocate(__element_count __n, [[__maybe_unused__]] size_t __align = _LI return static_cast<_Tp*>(__builtin_operator_new(__size)); } -#if _LIBCPP_HAS_SIZED_DEALLOCATION +#if defined(__cpp_sized_deallocation) && __cpp_sized_deallocation >= 201309L # define _LIBCPP_ONLY_IF_SIZED_DEALLOCATION(...) __VA_ARGS__ #else # define _LIBCPP_ONLY_IF_SIZED_DEALLOCATION(...) /* nothing */ diff --git a/lib/libcxx/include/__new/exceptions.h b/lib/libcxx/include/__new/exceptions.h index 86951818b7..1aadc23120 100644 --- a/lib/libcxx/include/__new/exceptions.h +++ b/lib/libcxx/include/__new/exceptions.h @@ -17,6 +17,12 @@ # pragma GCC system_header #endif +// defines its own std::bad_alloc type, +// which we use in order to be ABI-compatible with other STLs on Windows. +#if defined(_LIBCPP_ABI_VCRUNTIME) +# include +#endif + _LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD #if !defined(_LIBCPP_ABI_VCRUNTIME) @@ -26,7 +32,7 @@ public: _LIBCPP_HIDE_FROM_ABI bad_alloc(const bad_alloc&) _NOEXCEPT = default; _LIBCPP_HIDE_FROM_ABI bad_alloc& operator=(const bad_alloc&) _NOEXCEPT = default; ~bad_alloc() _NOEXCEPT override; - const char* what() const _NOEXCEPT override; + [[__nodiscard__]] const char* what() const _NOEXCEPT override; }; class _LIBCPP_EXPORTED_FROM_ABI bad_array_new_length : public bad_alloc { @@ -35,7 +41,7 @@ public: _LIBCPP_HIDE_FROM_ABI bad_array_new_length(const bad_array_new_length&) _NOEXCEPT = default; _LIBCPP_HIDE_FROM_ABI bad_array_new_length& operator=(const bad_array_new_length&) _NOEXCEPT = default; ~bad_array_new_length() _NOEXCEPT override; - const char* what() const _NOEXCEPT override; + [[__nodiscard__]] const char* what() const _NOEXCEPT override; }; #elif defined(_HAS_EXCEPTIONS) && _HAS_EXCEPTIONS == 0 // !_LIBCPP_ABI_VCRUNTIME diff --git a/lib/libcxx/include/__new/global_new_delete.h b/lib/libcxx/include/__new/global_new_delete.h index 96510ab56b..f31bac3730 100644 --- a/lib/libcxx/include/__new/global_new_delete.h +++ b/lib/libcxx/include/__new/global_new_delete.h @@ -12,7 +12,6 @@ #include <__config> #include <__cstddef/size_t.h> #include <__new/align_val_t.h> -#include <__new/exceptions.h> #include <__new/nothrow_t.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -25,12 +24,6 @@ # define _THROW_BAD_ALLOC #endif -#if defined(__cpp_sized_deallocation) && __cpp_sized_deallocation >= 201309L -# define _LIBCPP_HAS_SIZED_DEALLOCATION 1 -#else -# define _LIBCPP_HAS_SIZED_DEALLOCATION 0 -#endif - #if defined(_LIBCPP_ABI_VCRUNTIME) # include #else @@ -39,7 +32,7 @@ _LIBCPP_NOALIAS; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p) _NOEXCEPT; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, const std::nothrow_t&) _NOEXCEPT; -# if _LIBCPP_HAS_SIZED_DEALLOCATION +# if defined(__cpp_sized_deallocation) && __cpp_sized_deallocation >= 201309L _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::size_t __sz) _NOEXCEPT; # endif @@ -48,7 +41,7 @@ _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::size_t __sz) _ _LIBCPP_NOALIAS; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p) _NOEXCEPT; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, const std::nothrow_t&) _NOEXCEPT; -# if _LIBCPP_HAS_SIZED_DEALLOCATION +# if defined(__cpp_sized_deallocation) && __cpp_sized_deallocation >= 201309L _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::size_t __sz) _NOEXCEPT; # endif @@ -58,7 +51,7 @@ _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::size_t __sz) operator new(std::size_t __sz, std::align_val_t, const std::nothrow_t&) _NOEXCEPT _LIBCPP_NOALIAS; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::align_val_t) _NOEXCEPT; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::align_val_t, const std::nothrow_t&) _NOEXCEPT; -# if _LIBCPP_HAS_SIZED_DEALLOCATION +# if defined(__cpp_sized_deallocation) && __cpp_sized_deallocation >= 201309L _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::size_t __sz, std::align_val_t) _NOEXCEPT; # endif @@ -68,7 +61,7 @@ operator new[](std::size_t __sz, std::align_val_t) _THROW_BAD_ALLOC; operator new[](std::size_t __sz, std::align_val_t, const std::nothrow_t&) _NOEXCEPT _LIBCPP_NOALIAS; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::align_val_t) _NOEXCEPT; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::align_val_t, const std::nothrow_t&) _NOEXCEPT; -# if _LIBCPP_HAS_SIZED_DEALLOCATION +# if defined(__cpp_sized_deallocation) && __cpp_sized_deallocation >= 201309L _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::size_t __sz, std::align_val_t) _NOEXCEPT; # endif # endif diff --git a/lib/libcxx/include/__new/interference_size.h b/lib/libcxx/include/__new/interference_size.h index d326c43a33..591d0ab405 100644 --- a/lib/libcxx/include/__new/interference_size.h +++ b/lib/libcxx/include/__new/interference_size.h @@ -20,13 +20,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 17 -# if defined(__GCC_DESTRUCTIVE_SIZE) && defined(__GCC_CONSTRUCTIVE_SIZE) - inline constexpr size_t hardware_destructive_interference_size = __GCC_DESTRUCTIVE_SIZE; inline constexpr size_t hardware_constructive_interference_size = __GCC_CONSTRUCTIVE_SIZE; -# endif // defined(__GCC_DESTRUCTIVE_SIZE) && defined(__GCC_CONSTRUCTIVE_SIZE) - #endif // _LIBCPP_STD_VER >= 17 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__new/launder.h b/lib/libcxx/include/__new/launder.h index 83d8001591..886f614eed 100644 --- a/lib/libcxx/include/__new/launder.h +++ b/lib/libcxx/include/__new/launder.h @@ -10,8 +10,6 @@ #define _LIBCPP___NEW_LAUNDER_H #include <__config> -#include <__type_traits/is_function.h> -#include <__type_traits/is_void.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -20,15 +18,15 @@ _LIBCPP_BEGIN_NAMESPACE_STD template [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp* __launder(_Tp* __p) _NOEXCEPT { - static_assert(!(is_function<_Tp>::value), "can't launder functions"); - static_assert(!is_void<_Tp>::value, "can't launder cv-void"); + // The compiler diagnoses misuses of __builtin_launder, so we don't need to add any static_asserts + // to implement the Mandates. return __builtin_launder(__p); } #if _LIBCPP_STD_VER >= 17 template [[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI constexpr _Tp* launder(_Tp* __p) noexcept { - return std::__launder(__p); + return __builtin_launder(__p); } #endif _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__new/nothrow_t.h b/lib/libcxx/include/__new/nothrow_t.h index a286bf7af6..a099772123 100644 --- a/lib/libcxx/include/__new/nothrow_t.h +++ b/lib/libcxx/include/__new/nothrow_t.h @@ -19,7 +19,7 @@ # include #else _LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD -struct _LIBCPP_EXPORTED_FROM_ABI nothrow_t { +struct nothrow_t { explicit nothrow_t() = default; }; extern _LIBCPP_EXPORTED_FROM_ABI const nothrow_t nothrow; diff --git a/lib/libcxx/include/__numeric/gcd_lcm.h b/lib/libcxx/include/__numeric/gcd_lcm.h index 95df54dc06..5ab870fa73 100644 --- a/lib/libcxx/include/__numeric/gcd_lcm.h +++ b/lib/libcxx/include/__numeric/gcd_lcm.h @@ -33,28 +33,26 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 17 -template ::value> -struct __ct_abs; - -template -struct __ct_abs<_Result, _Source, true> { - constexpr _LIBCPP_HIDE_FROM_ABI _Result operator()(_Source __t) const noexcept { +template +constexpr _LIBCPP_HIDE_FROM_ABI _Result __abs_in_type(_Source __t) noexcept { + if constexpr (is_signed_v<_Source>) { if (__t >= 0) return __t; if (__t == numeric_limits<_Source>::min()) return -static_cast<_Result>(__t); return -__t; + } else { + return __t; } -}; +} -template -struct __ct_abs<_Result, _Source, false> { - constexpr _LIBCPP_HIDE_FROM_ABI _Result operator()(_Source __t) const noexcept { return __t; } -}; - -template -constexpr _LIBCPP_HIDDEN _Tp __gcd(_Tp __a, _Tp __b) { - static_assert(!is_signed<_Tp>::value, ""); +template +constexpr _LIBCPP_HIDE_FROM_ABI common_type_t<_Tp, _Up> gcd(_Tp __m, _Up __n) { + static_assert(is_integral<_Tp>::value && is_integral<_Up>::value, "Arguments to gcd must be integer types"); + static_assert(!is_same<__remove_cv_t<_Tp>, bool>::value, "First argument to gcd cannot be bool"); + static_assert(!is_same<__remove_cv_t<_Up>, bool>::value, "Second argument to gcd cannot be bool"); + using _Rp = common_type_t<_Tp, _Up>; + using _Wp = make_unsigned_t<_Rp>; // Using Binary GCD algorithm https://en.wikipedia.org/wiki/Binary_GCD_algorithm, based on an implementation // from https://lemire.me/blog/2024/04/13/greatest-common-divisor-the-extended-euclidean-algorithm-and-speed/ @@ -67,22 +65,25 @@ constexpr _LIBCPP_HIDDEN _Tp __gcd(_Tp __a, _Tp __b) { // // And standard gcd algorithm where instead of modulo, minus is used. + auto __a = static_cast<_Wp>(std::__abs_in_type<_Rp>(__m)); + auto __b = static_cast<_Wp>(std::__abs_in_type<_Rp>(__n)); + if (__a < __b) { - _Tp __tmp = __b; + _Wp __tmp = __b; __b = __a; __a = __tmp; } if (__b == 0) - return __a; + return static_cast<_Rp>(__a); __a %= __b; // Make both argument of the same size, and early result in the easy case. if (__a == 0) - return __b; + return static_cast<_Rp>(__b); - _Tp __c = __a | __b; + _Wp __c = __a | __b; int __shift = std::__countr_zero(__c); __a >>= std::__countr_zero(__a); do { - _Tp __t = __b >> std::__countr_zero(__b); + _Wp __t = __b >> std::__countr_zero(__b); if (__a > __t) { __b = __a - __t; __a = __t; @@ -90,18 +91,7 @@ constexpr _LIBCPP_HIDDEN _Tp __gcd(_Tp __a, _Tp __b) { __b = __t - __a; } } while (__b != 0); - return __a << __shift; -} - -template -constexpr _LIBCPP_HIDE_FROM_ABI common_type_t<_Tp, _Up> gcd(_Tp __m, _Up __n) { - static_assert(is_integral<_Tp>::value && is_integral<_Up>::value, "Arguments to gcd must be integer types"); - static_assert(!is_same<__remove_cv_t<_Tp>, bool>::value, "First argument to gcd cannot be bool"); - static_assert(!is_same<__remove_cv_t<_Up>, bool>::value, "Second argument to gcd cannot be bool"); - using _Rp = common_type_t<_Tp, _Up>; - using _Wp = make_unsigned_t<_Rp>; - return static_cast<_Rp>( - std::__gcd(static_cast<_Wp>(__ct_abs<_Rp, _Tp>()(__m)), static_cast<_Wp>(__ct_abs<_Rp, _Up>()(__n)))); + return static_cast<_Rp>(__a << __shift); } template @@ -113,8 +103,8 @@ constexpr _LIBCPP_HIDE_FROM_ABI common_type_t<_Tp, _Up> lcm(_Tp __m, _Up __n) { return 0; using _Rp = common_type_t<_Tp, _Up>; - _Rp __val1 = __ct_abs<_Rp, _Tp>()(__m) / std::gcd(__m, __n); - _Rp __val2 = __ct_abs<_Rp, _Up>()(__n); + _Rp __val1 = std::__abs_in_type<_Rp>(__m) / std::gcd(__m, __n); + _Rp __val2 = std::__abs_in_type<_Rp>(__n); _Rp __res; [[maybe_unused]] bool __overflow = __builtin_mul_overflow(__val1, __val2, std::addressof(__res)); _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(!__overflow, "Overflow in lcm"); diff --git a/lib/libcxx/include/__numeric/midpoint.h b/lib/libcxx/include/__numeric/midpoint.h index 2ba80e5cca..d8e73ab8ca 100644 --- a/lib/libcxx/include/__numeric/midpoint.h +++ b/lib/libcxx/include/__numeric/midpoint.h @@ -12,16 +12,13 @@ #include <__config> #include <__cstddef/ptrdiff_t.h> -#include <__type_traits/enable_if.h> #include <__type_traits/is_floating_point.h> #include <__type_traits/is_integral.h> -#include <__type_traits/is_null_pointer.h> #include <__type_traits/is_object.h> -#include <__type_traits/is_pointer.h> #include <__type_traits/is_same.h> #include <__type_traits/is_void.h> #include <__type_traits/make_unsigned.h> -#include <__type_traits/remove_pointer.h> +#include <__type_traits/remove_cv.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -35,8 +32,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template -_LIBCPP_HIDE_FROM_ABI constexpr enable_if_t && !is_same_v && !is_null_pointer_v<_Tp>, _Tp> -midpoint(_Tp __a, _Tp __b) noexcept _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK { + requires(is_integral_v<_Tp> && !is_same_v, bool>) +[[nodiscard]] +_LIBCPP_HIDE_FROM_ABI constexpr _Tp midpoint(_Tp __a, _Tp __b) noexcept _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK { using _Up = make_unsigned_t<_Tp>; constexpr _Up __bitshift = numeric_limits<_Up>::digits - 1; @@ -48,23 +46,20 @@ midpoint(_Tp __a, _Tp __b) noexcept _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK return __a + __half_diff; } -template && !is_void_v<_Tp> && (sizeof(_Tp) > 0), int> = 0> -_LIBCPP_HIDE_FROM_ABI constexpr _Tp* midpoint(_Tp* __a, _Tp* __b) noexcept { +template + requires(is_object_v<_Tp> && (sizeof(_Tp) > 0)) +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp* midpoint(_Tp* __a, _Tp* __b) noexcept { return __a + std::midpoint(ptrdiff_t(0), __b - __a); } -template -_LIBCPP_HIDE_FROM_ABI constexpr int __sign(_Tp __val) { - return (_Tp(0) < __val) - (__val < _Tp(0)); -} - template _LIBCPP_HIDE_FROM_ABI constexpr _Fp __fp_abs(_Fp __f) { return __f >= 0 ? __f : -__f; } template -_LIBCPP_HIDE_FROM_ABI constexpr enable_if_t, _Fp> midpoint(_Fp __a, _Fp __b) noexcept { + requires(is_floating_point_v<_Fp>) +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Fp midpoint(_Fp __a, _Fp __b) noexcept { constexpr _Fp __lo = numeric_limits<_Fp>::min() * 2; constexpr _Fp __hi = numeric_limits<_Fp>::max() / 2; diff --git a/lib/libcxx/include/__numeric/pstl.h b/lib/libcxx/include/__numeric/pstl.h index 22d971ac3b..fe7b2cc7a8 100644 --- a/lib/libcxx/include/__numeric/pstl.h +++ b/lib/libcxx/include/__numeric/pstl.h @@ -70,7 +70,7 @@ template , enable_if_t, int> = 0> -_LIBCPP_HIDE_FROM_ABI __iter_value_type<_ForwardIterator> +_LIBCPP_HIDE_FROM_ABI __iterator_value_type<_ForwardIterator> reduce(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last) { _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(_ForwardIterator, "reduce requires ForwardIterators"); using _Implementation = __pstl::__dispatch<__pstl::__reduce, __pstl::__current_configuration, _RawPolicy>; @@ -78,7 +78,7 @@ reduce(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator _ std::forward<_ExecutionPolicy>(__policy), std::move(__first), std::move(__last), - __iter_value_type<_ForwardIterator>(), + __iterator_value_type<_ForwardIterator>(), plus{}); } diff --git a/lib/libcxx/include/__numeric/saturation_arithmetic.h b/lib/libcxx/include/__numeric/saturation_arithmetic.h index 9bd3af12c9..4491bab2b1 100644 --- a/lib/libcxx/include/__numeric/saturation_arithmetic.h +++ b/lib/libcxx/include/__numeric/saturation_arithmetic.h @@ -30,6 +30,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <__signed_or_unsigned_integer _Tp> _LIBCPP_HIDE_FROM_ABI constexpr _Tp __add_sat(_Tp __x, _Tp __y) noexcept { +# if defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER >= 2101 + return __builtin_elementwise_add_sat(__x, __y); +# else if (_Tp __sum; !__builtin_add_overflow(__x, __y, std::addressof(__sum))) return __sum; // Handle overflow @@ -44,10 +47,14 @@ _LIBCPP_HIDE_FROM_ABI constexpr _Tp __add_sat(_Tp __x, _Tp __y) noexcept { // Overflows if (x < 0 && y < 0) return std::numeric_limits<_Tp>::min(); } +# endif } template <__signed_or_unsigned_integer _Tp> _LIBCPP_HIDE_FROM_ABI constexpr _Tp __sub_sat(_Tp __x, _Tp __y) noexcept { +# if defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER >= 2101 + return __builtin_elementwise_sub_sat(__x, __y); +# else if (_Tp __sub; !__builtin_sub_overflow(__x, __y, std::addressof(__sub))) return __sub; // Handle overflow @@ -63,6 +70,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr _Tp __sub_sat(_Tp __x, _Tp __y) noexcept { // Overflows if (x < 0 && y > 0) return std::numeric_limits<_Tp>::min(); } +# endif } template <__signed_or_unsigned_integer _Tp> @@ -113,27 +121,27 @@ _LIBCPP_HIDE_FROM_ABI constexpr _Rp __saturate_cast(_Tp __x) noexcept { #if _LIBCPP_STD_VER >= 26 template <__signed_or_unsigned_integer _Tp> -_LIBCPP_HIDE_FROM_ABI constexpr _Tp add_sat(_Tp __x, _Tp __y) noexcept { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp add_sat(_Tp __x, _Tp __y) noexcept { return std::__add_sat(__x, __y); } template <__signed_or_unsigned_integer _Tp> -_LIBCPP_HIDE_FROM_ABI constexpr _Tp sub_sat(_Tp __x, _Tp __y) noexcept { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp sub_sat(_Tp __x, _Tp __y) noexcept { return std::__sub_sat(__x, __y); } template <__signed_or_unsigned_integer _Tp> -_LIBCPP_HIDE_FROM_ABI constexpr _Tp mul_sat(_Tp __x, _Tp __y) noexcept { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp mul_sat(_Tp __x, _Tp __y) noexcept { return std::__mul_sat(__x, __y); } template <__signed_or_unsigned_integer _Tp> -_LIBCPP_HIDE_FROM_ABI constexpr _Tp div_sat(_Tp __x, _Tp __y) noexcept { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp div_sat(_Tp __x, _Tp __y) noexcept { return std::__div_sat(__x, __y); } template <__signed_or_unsigned_integer _Rp, __signed_or_unsigned_integer _Tp> -_LIBCPP_HIDE_FROM_ABI constexpr _Rp saturate_cast(_Tp __x) noexcept { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Rp saturate_cast(_Tp __x) noexcept { return std::__saturate_cast<_Rp>(__x); } diff --git a/lib/libcxx/include/__ostream/basic_ostream.h b/lib/libcxx/include/__ostream/basic_ostream.h index effeef491f..62770be72f 100644 --- a/lib/libcxx/include/__ostream/basic_ostream.h +++ b/lib/libcxx/include/__ostream/basic_ostream.h @@ -53,7 +53,7 @@ public: typedef typename traits_type::off_type off_type; // 27.7.2.2 Constructor/destructor: - inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 explicit basic_ostream(basic_streambuf* __sb) { + inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 explicit basic_ostream(basic_streambuf* __sb) { this->init(__sb); } ~basic_ostream() override; @@ -67,7 +67,7 @@ protected: // 27.7.2.3 Assign/swap inline _LIBCPP_HIDE_FROM_ABI basic_ostream& operator=(basic_ostream&& __rhs); - inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void swap(basic_ostream& __rhs) { + inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 void swap(basic_ostream& __rhs) { basic_ios::swap(__rhs); } @@ -76,17 +76,17 @@ public: class sentry; // 27.7.2.6 Formatted output: - inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 basic_ostream& operator<<(basic_ostream& (*__pf)(basic_ostream&)) { + inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 basic_ostream& operator<<(basic_ostream& (*__pf)(basic_ostream&)) { return __pf(*this); } - inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 basic_ostream& + inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 basic_ostream& operator<<(basic_ios& (*__pf)(basic_ios&)) { __pf(*this); return *this; } - inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 basic_ostream& operator<<(ios_base& (*__pf)(ios_base&)) { + inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 basic_ostream& operator<<(ios_base& (*__pf)(ios_base&)) { __pf(*this); return *this; } @@ -174,9 +174,9 @@ public: basic_ostream& flush(); // 27.7.2.5 seeks: - inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 pos_type tellp(); - inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 basic_ostream& seekp(pos_type __pos); - inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 basic_ostream& seekp(off_type __off, ios_base::seekdir __dir); + [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 pos_type tellp(); + inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 basic_ostream& seekp(pos_type __pos); + inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 basic_ostream& seekp(off_type __off, ios_base::seekdir __dir); protected: _LIBCPP_HIDE_FROM_ABI basic_ostream() {} // extension, intentially does not initialize diff --git a/lib/libcxx/include/__pstl/backends/default.h b/lib/libcxx/include/__pstl/backends/default.h index 3672bbf60a..43b1f1ce38 100644 --- a/lib/libcxx/include/__pstl/backends/default.h +++ b/lib/libcxx/include/__pstl/backends/default.h @@ -102,7 +102,7 @@ struct __find<__default_backend_tag, _ExecutionPolicy> { operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) const noexcept { using _FindIf = __dispatch<__find_if, __current_configuration, _ExecutionPolicy>; return _FindIf()( - __policy, std::move(__first), std::move(__last), [&](__iter_reference<_ForwardIterator> __element) { + __policy, std::move(__first), std::move(__last), [&](__iterator_reference<_ForwardIterator> __element) { return __element == __value; }); } @@ -137,7 +137,7 @@ struct __all_of<__default_backend_tag, _ExecutionPolicy> { [[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Pred&& __pred) const noexcept { using _AnyOf = __dispatch<__any_of, __current_configuration, _ExecutionPolicy>; - auto __res = _AnyOf()(__policy, __first, __last, [&](__iter_reference<_ForwardIterator> __value) { + auto __res = _AnyOf()(__policy, __first, __last, [&](__iterator_reference<_ForwardIterator> __value) { return !__pred(__value); }); if (!__res) @@ -204,7 +204,7 @@ struct __fill<__default_backend_tag, _ExecutionPolicy> { [[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional<__empty> operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Tp const& __value) const noexcept { using _ForEach = __dispatch<__for_each, __current_configuration, _ExecutionPolicy>; - using _Ref = __iter_reference<_ForwardIterator>; + using _Ref = __iterator_reference<_ForwardIterator>; return _ForEach()(__policy, std::move(__first), std::move(__last), [&](_Ref __element) { __element = __value; }); } }; @@ -233,7 +233,7 @@ struct __replace<__default_backend_tag, _ExecutionPolicy> { operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Tp const& __old, _Tp const& __new) const noexcept { using _ReplaceIf = __dispatch<__replace_if, __current_configuration, _ExecutionPolicy>; - using _Ref = __iter_reference<_ForwardIterator>; + using _Ref = __iterator_reference<_ForwardIterator>; return _ReplaceIf()( __policy, std::move(__first), std::move(__last), [&](_Ref __element) { return __element == __old; }, __new); } @@ -246,7 +246,7 @@ struct __replace_if<__default_backend_tag, _ExecutionPolicy> { _Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Pred&& __pred, _Tp const& __new_value) const noexcept { using _ForEach = __dispatch<__for_each, __current_configuration, _ExecutionPolicy>; - using _Ref = __iter_reference<_ForwardIterator>; + using _Ref = __iterator_reference<_ForwardIterator>; return _ForEach()(__policy, std::move(__first), std::move(__last), [&](_Ref __element) { if (__pred(__element)) __element = __new_value; @@ -260,7 +260,7 @@ struct __generate<__default_backend_tag, _ExecutionPolicy> { [[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional<__empty> operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Generator&& __gen) const noexcept { using _ForEach = __dispatch<__for_each, __current_configuration, _ExecutionPolicy>; - using _Ref = __iter_reference<_ForwardIterator>; + using _Ref = __iterator_reference<_ForwardIterator>; return _ForEach()(__policy, std::move(__first), std::move(__last), [&](_Ref __element) { __element = __gen(); }); } }; @@ -271,7 +271,7 @@ struct __generate_n<__default_backend_tag, _ExecutionPolicy> { [[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional<__empty> operator()(_Policy&& __policy, _ForwardIterator __first, _Size __n, _Generator&& __gen) const noexcept { using _ForEachN = __dispatch<__for_each_n, __current_configuration, _ExecutionPolicy>; - using _Ref = __iter_reference<_ForwardIterator>; + using _Ref = __iterator_reference<_ForwardIterator>; return _ForEachN()(__policy, std::move(__first), __n, [&](_Ref __element) { __element = __gen(); }); } }; @@ -295,11 +295,11 @@ struct __sort<__default_backend_tag, _ExecutionPolicy> { template struct __count_if<__default_backend_tag, _ExecutionPolicy> { template - [[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional<__iter_diff_t<_ForwardIterator>> operator()( + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional<__iterator_difference_type<_ForwardIterator>> operator()( _Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate&& __pred) const noexcept { using _TransformReduce = __dispatch<__transform_reduce, __current_configuration, _ExecutionPolicy>; - using _DiffT = __iter_diff_t<_ForwardIterator>; - using _Ref = __iter_reference<_ForwardIterator>; + using _DiffT = __iterator_difference_type<_ForwardIterator>; + using _Ref = __iterator_reference<_ForwardIterator>; return _TransformReduce()( __policy, std::move(__first), std::move(__last), _DiffT{}, std::plus{}, [&](_Ref __element) -> _DiffT { return __pred(__element) ? _DiffT(1) : _DiffT(0); @@ -310,10 +310,10 @@ struct __count_if<__default_backend_tag, _ExecutionPolicy> { template struct __count<__default_backend_tag, _ExecutionPolicy> { template - [[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional<__iter_diff_t<_ForwardIterator>> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional<__iterator_difference_type<_ForwardIterator>> operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Tp const& __value) const noexcept { using _CountIf = __dispatch<__count_if, __current_configuration, _ExecutionPolicy>; - using _Ref = __iter_reference<_ForwardIterator>; + using _Ref = __iterator_reference<_ForwardIterator>; return _CountIf()(__policy, std::move(__first), std::move(__last), [&](_Ref __element) -> bool { return __element == __value; }); @@ -402,7 +402,7 @@ struct __replace_copy_if<__default_backend_tag, _ExecutionPolicy> { _Pred&& __pred, _Tp const& __new_value) const noexcept { using _Transform = __dispatch<__transform, __current_configuration, _ExecutionPolicy>; - using _Ref = __iter_reference<_ForwardIterator>; + using _Ref = __iterator_reference<_ForwardIterator>; auto __res = _Transform()(__policy, std::move(__first), std::move(__last), std::move(__out_it), [&](_Ref __element) { return __pred(__element) ? __new_value : __element; @@ -424,7 +424,7 @@ struct __replace_copy<__default_backend_tag, _ExecutionPolicy> { _Tp const& __old_value, _Tp const& __new_value) const noexcept { using _ReplaceCopyIf = __dispatch<__replace_copy_if, __current_configuration, _ExecutionPolicy>; - using _Ref = __iter_reference<_ForwardIterator>; + using _Ref = __iterator_reference<_ForwardIterator>; return _ReplaceCopyIf()( __policy, std::move(__first), diff --git a/lib/libcxx/include/__pstl/backends/libdispatch.h b/lib/libcxx/include/__pstl/backends/libdispatch.h index a640a40352..88d4231d29 100644 --- a/lib/libcxx/include/__pstl/backends/libdispatch.h +++ b/lib/libcxx/include/__pstl/backends/libdispatch.h @@ -269,7 +269,7 @@ struct __cpu_traits<__libdispatch_backend_tag> { return __empty{}; } - using _Value = __iter_value_type<_RandomAccessIterator>; + using _Value = __iterator_value_type<_RandomAccessIterator>; auto __destroy = [__size](_Value* __ptr) { std::destroy_n(__ptr, __size); @@ -282,7 +282,7 @@ struct __cpu_traits<__libdispatch_backend_tag> { // Initialize all elements to a moved-from state // TODO: Don't do this - this can be done in the first merge - see https://llvm.org/PR63928 std::__construct_at(__values.get(), std::move(*__first)); - for (__iter_diff_t<_RandomAccessIterator> __i = 1; __i != __size; ++__i) { + for (__iterator_difference_type<_RandomAccessIterator> __i = 1; __i != __size; ++__i) { std::__construct_at(__values.get() + __i, std::move(__values.get()[__i - 1])); } *__first = std::move(__values.get()[__size - 1]); diff --git a/lib/libcxx/include/__pstl/cpu_algos/find_if.h b/lib/libcxx/include/__pstl/cpu_algos/find_if.h index ebb4ecb4a0..aae64b66eb 100644 --- a/lib/libcxx/include/__pstl/cpu_algos/find_if.h +++ b/lib/libcxx/include/__pstl/cpu_algos/find_if.h @@ -119,7 +119,7 @@ struct __cpu_parallel_find_if { true); } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> && __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { - using __diff_t = __iter_diff_t<_ForwardIterator>; + using __diff_t = __iterator_difference_type<_ForwardIterator>; return __pstl::__simd_first<_Backend>( __first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) { return __pred(__iter[__i]); diff --git a/lib/libcxx/include/__pstl/cpu_algos/transform.h b/lib/libcxx/include/__pstl/cpu_algos/transform.h index 979121be8c..30d117d754 100644 --- a/lib/libcxx/include/__pstl/cpu_algos/transform.h +++ b/lib/libcxx/include/__pstl/cpu_algos/transform.h @@ -84,9 +84,8 @@ struct __cpu_parallel_transform { __first, __last - __first, __result, - [&](__iter_reference<_ForwardIterator> __in_value, __iter_reference<_ForwardOutIterator> __out_value) { - __out_value = __op(__in_value); - }); + [&](__iterator_reference<_ForwardIterator> __in_value, + __iterator_reference<_ForwardOutIterator> __out_value) { __out_value = __op(__in_value); }); } else { return std::transform(__first, __last, __result, __op); } @@ -138,9 +137,9 @@ struct __cpu_parallel_transform_binary { __last1 - __first1, __first2, __result, - [&](__iter_reference<_ForwardIterator1> __in1, - __iter_reference<_ForwardIterator2> __in2, - __iter_reference<_ForwardOutIterator> __out_value) { __out_value = __op(__in1, __in2); }); + [&](__iterator_reference<_ForwardIterator1> __in1, + __iterator_reference<_ForwardIterator2> __in2, + __iterator_reference<_ForwardOutIterator> __out_value) { __out_value = __op(__in1, __in2); }); } else { return std::transform(__first1, __last1, __first2, __result, __op); } diff --git a/lib/libcxx/include/__pstl/cpu_algos/transform_reduce.h b/lib/libcxx/include/__pstl/cpu_algos/transform_reduce.h index abd9d42a6f..edfb28b446 100644 --- a/lib/libcxx/include/__pstl/cpu_algos/transform_reduce.h +++ b/lib/libcxx/include/__pstl/cpu_algos/transform_reduce.h @@ -148,9 +148,10 @@ struct __cpu_parallel_transform_reduce_binary { __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value && __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) { return __pstl::__simd_transform_reduce<_Backend>( - __last1 - __first1, std::move(__init), std::move(__reduce), [&](__iter_diff_t<_ForwardIterator1> __i) { - return __transform(__first1[__i], __first2[__i]); - }); + __last1 - __first1, + std::move(__init), + std::move(__reduce), + [&](__iterator_difference_type<_ForwardIterator1> __i) { return __transform(__first1[__i], __first2[__i]); }); } else { return std::transform_reduce( std::move(__first1), @@ -200,7 +201,7 @@ struct __cpu_parallel_transform_reduce { __last - __first, std::move(__init), std::move(__reduce), - [=, &__transform](__iter_diff_t<_ForwardIterator> __i) { return __transform(__first[__i]); }); + [=, &__transform](__iterator_difference_type<_ForwardIterator> __i) { return __transform(__first[__i]); }); } else { return std::transform_reduce( std::move(__first), std::move(__last), std::move(__init), std::move(__reduce), std::move(__transform)); diff --git a/lib/libcxx/include/__random/binomial_distribution.h b/lib/libcxx/include/__random/binomial_distribution.h index b4b4340827..0712e4ef4a 100644 --- a/lib/libcxx/include/__random/binomial_distribution.h +++ b/lib/libcxx/include/__random/binomial_distribution.h @@ -97,13 +97,19 @@ public: } }; -// The LLVM C library provides this with conflicting `noexcept` attributes. -#if !defined(_LIBCPP_MSVCRT_LIKE) && !defined(__LLVM_LIBC__) -extern "C" double lgamma_r(double, int*); +// Some libc declares the math functions to be `noexcept`. +#if _LIBCPP_GLIBC_PREREQ(2, 8) || defined(__LLVM_LIBC__) +# define _LIBCPP_LGAMMA_R_NOEXCEPT _NOEXCEPT +#else +# define _LIBCPP_LGAMMA_R_NOEXCEPT +#endif + +#if !defined(_LIBCPP_MSVCRT_LIKE) +extern "C" double lgamma_r(double, int*) _LIBCPP_LGAMMA_R_NOEXCEPT; #endif inline _LIBCPP_HIDE_FROM_ABI double __libcpp_lgamma(double __d) { -#if defined(_LIBCPP_MSVCRT_LIKE) || defined(__LLVM_LIBC__) +#if defined(_LIBCPP_MSVCRT_LIKE) return lgamma(__d); #else int __sign; diff --git a/lib/libcxx/include/__random/mersenne_twister_engine.h b/lib/libcxx/include/__random/mersenne_twister_engine.h index c60fe1529b..332e830e73 100644 --- a/lib/libcxx/include/__random/mersenne_twister_engine.h +++ b/lib/libcxx/include/__random/mersenne_twister_engine.h @@ -62,24 +62,6 @@ _LIBCPP_HIDE_FROM_ABI bool operator==(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, _Bp, _Tp, _Cp, _Lp, _Fp>& __x, const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, _Bp, _Tp, _Cp, _Lp, _Fp>& __y); -template -_LIBCPP_HIDE_FROM_ABI bool -operator!=(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, _Bp, _Tp, _Cp, _Lp, _Fp>& __x, - const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, _Bp, _Tp, _Cp, _Lp, _Fp>& __y); - template (__x_[__i - 1])) + __i) & _Max; + __i_ = 0; + } template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI void seed(_Sseq& __q) { __seed(__q, integral_constant()); } // generating functions - _LIBCPP_HIDE_FROM_ABI result_type operator()(); + _LIBCPP_HIDE_FROM_ABI result_type operator()() { + const size_t __j = (__i_ + 1) % __n; + const result_type __mask = __r == _Dt ? result_type(~0) : (result_type(1) << __r) - result_type(1); + const result_type __yp = (__x_[__i_] & ~__mask) | (__x_[__j] & __mask); + const size_t __k = (__i_ + __m) % __n; + __x_[__i_] = __x_[__k] ^ __rshift<1>(__yp) ^ (__a * (__yp & 1)); + result_type __z = __x_[__i_] ^ (__rshift<__u>(__x_[__i_]) & __d); + __i_ = __j; + __z ^= __lshift<__s>(__z) & __b; + __z ^= __lshift<__t>(__z) & __c; + return __z ^ __rshift<__l>(__z); + } + _LIBCPP_HIDE_FROM_ABI void discard(unsigned long long __z) { for (; __z; --__z) operator()(); @@ -225,24 +224,6 @@ public: const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, _Bp, _Tp, _Cp, _Lp, _Fp>& __x, const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, _Bp, _Tp, _Cp, _Lp, _Fp>& __y); - template - friend bool operator!=( - const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, _Bp, _Tp, _Cp, _Lp, _Fp>& __x, - const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, _Bp, _Tp, _Cp, _Lp, _Fp>& __y); - template - _LIBCPP_HIDE_FROM_ABI void __seed(_Sseq& __q, integral_constant); + _LIBCPP_HIDE_FROM_ABI void __seed(_Sseq& __q, integral_constant) { + const unsigned __k = 1; + uint32_t __ar[__n * __k]; + __q.generate(__ar, __ar + __n * __k); + for (size_t __i = 0; __i < __n; ++__i) + __x_[__i] = static_cast(__ar[__i] & _Max); + const result_type __mask = __r == _Dt ? result_type(~0) : (result_type(1) << __r) - result_type(1); + __i_ = 0; + if ((__x_[0] & ~__mask) == 0) { + for (size_t __i = 1; __i < __n; ++__i) + if (__x_[__i] != 0) + return; + __x_[0] = result_type(1) << (__w - 1); + } + } + template - _LIBCPP_HIDE_FROM_ABI void __seed(_Sseq& __q, integral_constant); + _LIBCPP_HIDE_FROM_ABI void __seed(_Sseq& __q, integral_constant) { + const unsigned __k = 2; + uint32_t __ar[__n * __k]; + __q.generate(__ar, __ar + __n * __k); + for (size_t __i = 0; __i < __n; ++__i) + __x_[__i] = static_cast((__ar[2 * __i] + ((uint64_t)__ar[2 * __i + 1] << 32)) & _Max); + const result_type __mask = __r == _Dt ? result_type(~0) : (result_type(1) << __r) - result_type(1); + __i_ = 0; + if ((__x_[0] & ~__mask) == 0) { + for (size_t __i = 1; __i < __n; ++__i) + if (__x_[__i] != 0) + return; + __x_[0] = result_type(1) << (__w - 1); + } + } template = 0> _LIBCPP_HIDE_FROM_ABI static result_type __lshift(result_type __x) { @@ -310,120 +320,6 @@ private: } }; -template -void mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::seed( - result_type __sd) _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK { // __w >= 2 - __x_[0] = __sd & _Max; - for (size_t __i = 1; __i < __n; ++__i) - __x_[__i] = (__f * (__x_[__i - 1] ^ __rshift<__w - 2>(__x_[__i - 1])) + __i) & _Max; - __i_ = 0; -} - -template -template -void mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::__seed( - _Sseq& __q, integral_constant) { - const unsigned __k = 1; - uint32_t __ar[__n * __k]; - __q.generate(__ar, __ar + __n * __k); - for (size_t __i = 0; __i < __n; ++__i) - __x_[__i] = static_cast(__ar[__i] & _Max); - const result_type __mask = __r == _Dt ? result_type(~0) : (result_type(1) << __r) - result_type(1); - __i_ = 0; - if ((__x_[0] & ~__mask) == 0) { - for (size_t __i = 1; __i < __n; ++__i) - if (__x_[__i] != 0) - return; - __x_[0] = result_type(1) << (__w - 1); - } -} - -template -template -void mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::__seed( - _Sseq& __q, integral_constant) { - const unsigned __k = 2; - uint32_t __ar[__n * __k]; - __q.generate(__ar, __ar + __n * __k); - for (size_t __i = 0; __i < __n; ++__i) - __x_[__i] = static_cast((__ar[2 * __i] + ((uint64_t)__ar[2 * __i + 1] << 32)) & _Max); - const result_type __mask = __r == _Dt ? result_type(~0) : (result_type(1) << __r) - result_type(1); - __i_ = 0; - if ((__x_[0] & ~__mask) == 0) { - for (size_t __i = 1; __i < __n; ++__i) - if (__x_[__i] != 0) - return; - __x_[0] = result_type(1) << (__w - 1); - } -} - -template -_UIntType -mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::operator()() { - const size_t __j = (__i_ + 1) % __n; - const result_type __mask = __r == _Dt ? result_type(~0) : (result_type(1) << __r) - result_type(1); - const result_type __yp = (__x_[__i_] & ~__mask) | (__x_[__j] & __mask); - const size_t __k = (__i_ + __m) % __n; - __x_[__i_] = __x_[__k] ^ __rshift<1>(__yp) ^ (__a * (__yp & 1)); - result_type __z = __x_[__i_] ^ (__rshift<__u>(__x_[__i_]) & __d); - __i_ = __j; - __z ^= __lshift<__s>(__z) & __b; - __z ^= __lshift<__t>(__z) & __c; - return __z ^ __rshift<__l>(__z); -} - template #include <__algorithm/upper_bound.h> #include <__config> #include <__cstddef/ptrdiff_t.h> +#include <__iterator/back_insert_iterator.h> #include <__random/is_valid.h> #include <__random/uniform_real_distribution.h> #include <__vector/vector.h> @@ -190,8 +192,7 @@ piecewise_constant_distribution<_RealType>::param_type::param_type( __areas_.assign(1, 0.0); } else { __densities_.reserve(__b_.size() - 1); - for (size_t __i = 0; __i < __b_.size() - 1; ++__i, ++__f_w) - __densities_.push_back(*__f_w); + std::copy_n(__f_w, __b_.size() - 1, std::back_inserter(__densities_)); __init(); } } diff --git a/lib/libcxx/include/__random/piecewise_linear_distribution.h b/lib/libcxx/include/__random/piecewise_linear_distribution.h index a9906430c0..8aa3f19ca9 100644 --- a/lib/libcxx/include/__random/piecewise_linear_distribution.h +++ b/lib/libcxx/include/__random/piecewise_linear_distribution.h @@ -9,9 +9,11 @@ #ifndef _LIBCPP___RANDOM_PIECEWISE_LINEAR_DISTRIBUTION_H #define _LIBCPP___RANDOM_PIECEWISE_LINEAR_DISTRIBUTION_H +#include <__algorithm/copy_n.h> #include <__algorithm/upper_bound.h> #include <__config> #include <__cstddef/ptrdiff_t.h> +#include <__iterator/back_insert_iterator.h> #include <__random/is_valid.h> #include <__random/uniform_real_distribution.h> #include <__vector/comparison.h> @@ -194,8 +196,7 @@ piecewise_linear_distribution<_RealType>::param_type::param_type( __areas_.assign(1, 0.0); } else { __densities_.reserve(__b_.size()); - for (size_t __i = 0; __i < __b_.size(); ++__i, ++__f_w) - __densities_.push_back(*__f_w); + std::copy_n(__f_w, __b_.size(), std::back_inserter(__densities_)); __init(); } } diff --git a/lib/libcxx/include/__ranges/adjacent_transform_view.h b/lib/libcxx/include/__ranges/adjacent_transform_view.h new file mode 100644 index 0000000000..11b1176824 --- /dev/null +++ b/lib/libcxx/include/__ranges/adjacent_transform_view.h @@ -0,0 +1,406 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANGES_ADJACENT_TRANSFORM_VIEW_H +#define _LIBCPP___RANGES_ADJACENT_TRANSFORM_VIEW_H + +#include <__config> + +#include <__algorithm/min.h> +#include <__compare/three_way_comparable.h> +#include <__concepts/constructible.h> +#include <__concepts/convertible_to.h> +#include <__concepts/derived_from.h> +#include <__concepts/equality_comparable.h> +#include <__concepts/invocable.h> +#include <__cstddef/size_t.h> +#include <__functional/bind_back.h> +#include <__functional/invoke.h> +#include <__functional/operations.h> +#include <__iterator/concepts.h> +#include <__iterator/incrementable_traits.h> +#include <__iterator/iter_move.h> +#include <__iterator/iter_swap.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/next.h> +#include <__iterator/prev.h> +#include <__memory/addressof.h> +#include <__ranges/access.h> +#include <__ranges/adjacent_view.h> +#include <__ranges/all.h> +#include <__ranges/concepts.h> +#include <__ranges/empty_view.h> +#include <__ranges/movable_box.h> +#include <__ranges/range_adaptor.h> +#include <__ranges/size.h> +#include <__ranges/view_interface.h> +#include <__ranges/zip_transform_view.h> +#include <__type_traits/common_type.h> +#include <__type_traits/decay.h> +#include <__type_traits/is_nothrow_constructible.h> +#include <__type_traits/is_object.h> +#include <__type_traits/is_referenceable.h> +#include <__type_traits/make_unsigned.h> +#include <__type_traits/maybe_const.h> +#include <__utility/declval.h> +#include <__utility/forward.h> +#include <__utility/in_place.h> +#include <__utility/integer_sequence.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 23 + +namespace ranges { + +template +struct __apply_n { + template + static auto __apply(index_sequence<_Is...>) -> invoke_result_t<_Fn, decltype((void)_Is, std::declval<_Tp>())...>; + + template + static auto operator()(_Tp&&) -> decltype(__apply<_Tp>(make_index_sequence<_Np>{})); +}; + +template + requires view<_View> && (_Np > 0) && is_object_v<_Fn> && + regular_invocable<__apply_n<_Fn&, _Np>, range_reference_t<_View>> && + __referenceable, range_reference_t<_View>>> +class adjacent_transform_view : public view_interface> { +private: + _LIBCPP_NO_UNIQUE_ADDRESS adjacent_view<_View, _Np> __inner_; + _LIBCPP_NO_UNIQUE_ADDRESS __movable_box<_Fn> __fun_; + + using _InnerView _LIBCPP_NODEBUG = adjacent_view<_View, _Np>; + + template + using __inner_iterator _LIBCPP_NODEBUG = iterator_t<__maybe_const<_Const, _InnerView>>; + + template + using __inner_sentinel _LIBCPP_NODEBUG = sentinel_t<__maybe_const<_Const, _InnerView>>; + + template + class __iterator; + + template + class __sentinel; + +public: + _LIBCPP_HIDE_FROM_ABI adjacent_transform_view() = default; + + _LIBCPP_HIDE_FROM_ABI constexpr explicit adjacent_transform_view(_View __base, _Fn __fun) + : __inner_(std::move(__base)), __fun_(std::in_place, std::move(__fun)) {} + + _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& + requires copy_constructible<_View> + { + return __inner_.base(); + } + _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__inner_).base(); } + + _LIBCPP_HIDE_FROM_ABI constexpr auto begin() { return __iterator(*this, __inner_.begin()); } + + _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const + requires range && regular_invocable<__apply_n, range_reference_t> + { + return __iterator(*this, __inner_.begin()); + } + + _LIBCPP_HIDE_FROM_ABI constexpr auto end() { + if constexpr (common_range<_InnerView>) { + return __iterator(*this, __inner_.end()); + } else { + return __sentinel(__inner_.end()); + } + } + + _LIBCPP_HIDE_FROM_ABI constexpr auto end() const + requires range && regular_invocable<__apply_n, range_reference_t> + { + if constexpr (common_range) { + return __iterator(*this, __inner_.end()); + } else { + return __sentinel(__inner_.end()); + } + } + + _LIBCPP_HIDE_FROM_ABI constexpr auto size() + requires sized_range<_InnerView> + { + return __inner_.size(); + } + + _LIBCPP_HIDE_FROM_ABI constexpr auto size() const + requires sized_range + { + return __inner_.size(); + } +}; + +template + requires view<_View> && (_Np > 0) && is_object_v<_Fn> && + regular_invocable<__apply_n<_Fn&, _Np>, range_reference_t<_View>> && + __referenceable, range_reference_t<_View>>> +template +class adjacent_transform_view<_View, _Fn, _Np>::__iterator { + friend adjacent_transform_view; + + using _Parent _LIBCPP_NODEBUG = __maybe_const<_Const, adjacent_transform_view>; + using _Base _LIBCPP_NODEBUG = __maybe_const<_Const, _View>; + + _Parent* __parent_ = nullptr; + __inner_iterator<_Const> __inner_; + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator(_Parent& __parent, __inner_iterator<_Const> __inner) + : __parent_(std::addressof(__parent)), __inner_(std::move(__inner)) {} + + static consteval auto __get_iterator_category() { + using _Cat = iterator_traits>::iterator_category; + if constexpr (!is_reference_v< + invoke_result_t<__apply_n<__maybe_const<_Const, _Fn>&, _Np>, range_reference_t<_Base>>>) + return input_iterator_tag{}; + else if constexpr (derived_from<_Cat, random_access_iterator_tag>) + return random_access_iterator_tag{}; + else if constexpr (derived_from<_Cat, bidirectional_iterator_tag>) + return bidirectional_iterator_tag{}; + else if constexpr (derived_from<_Cat, forward_iterator_tag>) + return forward_iterator_tag{}; + else + return input_iterator_tag{}; + } + + template + static consteval bool __noexcept_dereference(index_sequence<_Is...>) { + return noexcept(std::invoke( + std::declval<__maybe_const<_Const, _Fn>&>(), ((void)_Is, *std::declval const&>())...)); + } + +public: + using iterator_category = decltype(__get_iterator_category()); + using iterator_concept = typename __inner_iterator<_Const>::iterator_concept; + using value_type = + remove_cvref_t&, _Np>, range_reference_t<_Base>>>; + using difference_type = range_difference_t<_Base>; + + _LIBCPP_HIDE_FROM_ABI __iterator() = default; + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator(__iterator __i) + requires _Const && convertible_to<__inner_iterator, __inner_iterator> + : __parent_(__i.__parent_), __inner_(std::move(__i.__inner_)) {} + + _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator*() const + noexcept(__noexcept_dereference(make_index_sequence<_Np>{})) { + return std::apply( + [&](const auto&... __iters) -> decltype(auto) { return std::invoke(*__parent_->__fun_, *__iters...); }, + __adjacent_view_iter_access::__get_current(__inner_)); + } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator++() { + ++__inner_; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator operator++(int) { + auto __tmp = *this; + ++*this; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator--() + requires bidirectional_range<_Base> + { + --__inner_; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator operator--(int) + requires bidirectional_range<_Base> + { + auto __tmp = *this; + --*this; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator+=(difference_type __x) + requires random_access_range<_Base> + { + __inner_ += __x; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator-=(difference_type __x) + requires random_access_range<_Base> + { + __inner_ -= __x; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator[](difference_type __n) const + requires random_access_range<_Base> + { + return std::apply( + [&](const auto&... __iters) -> decltype(auto) { return std::invoke(*__parent_->__fun_, __iters[__n]...); }, + __adjacent_view_iter_access::__get_current(__inner_)); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator& __x, const __iterator& __y) { + return __x.__inner_ == __y.__inner_; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator<(const __iterator& __x, const __iterator& __y) + requires random_access_range<_Base> + { + return __x.__inner_ < __y.__inner_; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator>(const __iterator& __x, const __iterator& __y) + requires random_access_range<_Base> + { + return __x.__inner_ > __y.__inner_; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator<=(const __iterator& __x, const __iterator& __y) + requires random_access_range<_Base> + { + return __x.__inner_ <= __y.__inner_; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator>=(const __iterator& __x, const __iterator& __y) + requires random_access_range<_Base> + { + return __x.__inner_ >= __y.__inner_; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr auto operator<=>(const __iterator& __x, const __iterator& __y) + requires random_access_range<_Base> && three_way_comparable<__inner_iterator<_Const>> + { + return __x.__inner_ <=> __y.__inner_; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(const __iterator& __i, difference_type __n) + requires random_access_range<_Base> + { + return __iterator(*__i.__parent_, __i.__inner_ + __n); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(difference_type __n, const __iterator& __i) + requires random_access_range<_Base> + { + return __iterator(*__i.__parent_, __i.__inner_ + __n); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(const __iterator& __i, difference_type __n) + requires random_access_range<_Base> + { + return __iterator(*__i.__parent_, __i.__inner_ - __n); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr difference_type operator-(const __iterator& __x, const __iterator& __y) + requires sized_sentinel_for<__inner_iterator<_Const>, __inner_iterator<_Const>> + { + return __x.__inner_ - __y.__inner_; + } +}; + +template + requires view<_View> && (_Np > 0) && is_object_v<_Fn> && + regular_invocable<__apply_n<_Fn&, _Np>, range_reference_t<_View>> && + __referenceable, range_reference_t<_View>>> +template +class adjacent_transform_view<_View, _Fn, _Np>::__sentinel { + friend adjacent_transform_view; + + __inner_sentinel<_Const> __inner_; + + _LIBCPP_HIDE_FROM_ABI constexpr explicit __sentinel(__inner_sentinel<_Const> __inner) + : __inner_(std::move(__inner)) {} + +public: + _LIBCPP_HIDE_FROM_ABI __sentinel() = default; + + _LIBCPP_HIDE_FROM_ABI constexpr __sentinel(__sentinel __i) + requires _Const && convertible_to<__inner_sentinel, __inner_sentinel<_Const>> + : __inner_(std::move(__i.__inner_)) {} + + template + requires sentinel_for<__inner_sentinel<_Const>, __inner_iterator<_OtherConst>> + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator<_OtherConst>& __x, const __sentinel& __y) { + return __x.__inner_ == __y.__inner_; + } + + template + requires sized_sentinel_for<__inner_sentinel<_Const>, __inner_iterator<_OtherConst>> + _LIBCPP_HIDE_FROM_ABI friend constexpr range_difference_t<__maybe_const<_OtherConst, _InnerView>> + operator-(const __iterator<_OtherConst>& __x, const __sentinel& __y) { + return __x.__inner_ - __y.__inner_; + } + + template + requires sized_sentinel_for<__inner_sentinel<_Const>, __inner_iterator<_OtherConst>> + _LIBCPP_HIDE_FROM_ABI friend constexpr range_difference_t<__maybe_const<_OtherConst, _InnerView>> + operator-(const __sentinel& __x, const __iterator<_OtherConst>& __y) { + return __x.__inner_ - __y.__inner_; + } +}; + +namespace views { +namespace __adjacent_transform { + +template +struct __fn : __range_adaptor_closure<__fn<_Np>> { + template + requires(_Np == 0 && forward_range<_Range &&>) + _LIBCPP_HIDE_FROM_ABI static constexpr auto + operator()(_Range&&, _Fn&& __fn) noexcept(noexcept(views::zip_transform(std::forward<_Fn>(__fn)))) + -> decltype(views::zip_transform(std::forward<_Fn>(__fn))) { + return views::zip_transform(std::forward<_Fn>(__fn)); + } + + template + _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Range&& __range, _Fn&& __fn) noexcept( + noexcept(adjacent_transform_view, decay_t<_Fn>, _Np>( + std::forward<_Range>(__range), std::forward<_Fn>(__fn)))) + -> decltype(adjacent_transform_view, decay_t<_Fn>, _Np>( + std::forward<_Range>(__range), std::forward<_Fn>(__fn))) { + return adjacent_transform_view, decay_t<_Fn>, _Np>( + std::forward<_Range>(__range), std::forward<_Fn>(__fn)); + } + + template + requires constructible_from, _Fn> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Fn&& __f) const + noexcept(is_nothrow_constructible_v, _Fn>) { + return __pipeable(std::__bind_back(*this, std::forward<_Fn>(__f))); + } +}; + +} // namespace __adjacent_transform +inline namespace __cpo { +template +inline constexpr auto adjacent_transform = __adjacent_transform::__fn<_Np>{}; +inline constexpr auto pairwise_transform = adjacent_transform<2>; +} // namespace __cpo +} // namespace views +} // namespace ranges + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANGES_ADJACENT_TRANSFORM_VIEW_H diff --git a/lib/libcxx/include/__ranges/adjacent_view.h b/lib/libcxx/include/__ranges/adjacent_view.h new file mode 100644 index 0000000000..40474b85c7 --- /dev/null +++ b/lib/libcxx/include/__ranges/adjacent_view.h @@ -0,0 +1,419 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANGES_ADJACENT_VIEW_H +#define _LIBCPP___RANGES_ADJACENT_VIEW_H + +#include <__config> + +#include <__algorithm/min.h> +#include <__compare/three_way_comparable.h> +#include <__concepts/constructible.h> +#include <__concepts/convertible_to.h> +#include <__concepts/equality_comparable.h> +#include <__cstddef/size_t.h> +#include <__functional/invoke.h> +#include <__functional/operations.h> +#include <__iterator/concepts.h> +#include <__iterator/incrementable_traits.h> +#include <__iterator/iter_move.h> +#include <__iterator/iter_swap.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/next.h> +#include <__iterator/prev.h> +#include <__ranges/access.h> +#include <__ranges/all.h> +#include <__ranges/concepts.h> +#include <__ranges/empty_view.h> +#include <__ranges/enable_borrowed_range.h> +#include <__ranges/range_adaptor.h> +#include <__ranges/size.h> +#include <__ranges/view_interface.h> +#include <__tuple/tuple_transform.h> +#include <__type_traits/common_type.h> +#include <__type_traits/is_nothrow_constructible.h> +#include <__type_traits/make_unsigned.h> +#include <__type_traits/maybe_const.h> +#include <__utility/declval.h> +#include <__utility/forward.h> +#include <__utility/integer_sequence.h> +#include <__utility/move.h> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 23 + +namespace ranges { + +template + requires view<_View> && (_Np > 0) +class adjacent_view : public view_interface> { +private: + _LIBCPP_NO_UNIQUE_ADDRESS _View __base_ = _View(); + + template + class __iterator; + + template + class __sentinel; + + struct __as_sentinel {}; + +public: + _LIBCPP_HIDE_FROM_ABI adjacent_view() + requires default_initializable<_View> + = default; + + _LIBCPP_HIDE_FROM_ABI constexpr explicit adjacent_view(_View __base) : __base_(std::move(__base)) {} + + _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& + requires copy_constructible<_View> + { + return __base_; + } + _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } + + _LIBCPP_HIDE_FROM_ABI constexpr auto begin() + requires(!__simple_view<_View>) + { + return __iterator(ranges::begin(__base_), ranges::end(__base_)); + } + + _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const + requires range // LWG4482 This is under-constrained. + { + return __iterator(ranges::begin(__base_), ranges::end(__base_)); + } + + _LIBCPP_HIDE_FROM_ABI constexpr auto end() + requires(!__simple_view<_View>) + { + if constexpr (common_range<_View>) { + return __iterator(__as_sentinel{}, ranges::begin(__base_), ranges::end(__base_)); + } else { + return __sentinel(ranges::end(__base_)); + } + } + + _LIBCPP_HIDE_FROM_ABI constexpr auto end() const + requires range // LWG4482 This is under-constrained. + { + if constexpr (common_range) { + return __iterator(__as_sentinel{}, ranges::begin(__base_), ranges::end(__base_)); + } else { + return __sentinel(ranges::end(__base_)); + } + } + + _LIBCPP_HIDE_FROM_ABI constexpr auto size() + requires sized_range<_View> + { + using _ST = decltype(ranges::size(__base_)); + using _CT = common_type_t<_ST, size_t>; + auto __sz = static_cast<_CT>(ranges::size(__base_)); + __sz -= std::min<_CT>(__sz, _Np - 1); + return static_cast<_ST>(__sz); + } + + _LIBCPP_HIDE_FROM_ABI constexpr auto size() const + requires sized_range + { + using _ST = decltype(ranges::size(__base_)); + using _CT = common_type_t<_ST, size_t>; + auto __sz = static_cast<_CT>(ranges::size(__base_)); + __sz -= std::min<_CT>(__sz, _Np - 1); + return static_cast<_ST>(__sz); + } +}; + +struct __adjacent_view_iter_access { + template + _LIBCPP_HIDE_FROM_ABI constexpr static auto& __get_current(_Iter& __it) noexcept { + return __it.__current_; + } +}; + +template + requires view<_View> && (_Np > 0) +template +class adjacent_view<_View, _Np>::__iterator { + friend __adjacent_view_iter_access; + friend adjacent_view; + using _Base _LIBCPP_NODEBUG = __maybe_const<_Const, _View>; + array, _Np> __current_ = array, _Np>(); + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator(iterator_t<_Base> __first, sentinel_t<_Base> __last) { + __current_[0] = __first; + for (size_t __i = 1; __i < _Np; ++__i) { + __current_[__i] = ranges::next(__current_[__i - 1], 1, __last); + } + } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator(__as_sentinel, iterator_t<_Base> __first, iterator_t<_Base> __last) { + if constexpr (!bidirectional_range<_Base>) { + __current_.fill(__last); + } else { + __current_[_Np - 1] = __last; + for (int __i = static_cast(_Np) - 2; __i >= 0; --__i) { + __current_[__i] = ranges::prev(__current_[__i + 1], 1, __first); + } + } + } + + template + _LIBCPP_HIDE_FROM_ABI explicit constexpr __iterator(_Iter&& __i, index_sequence<_Is...>) + : __current_{std::move(__i.__current_[_Is])...} {} + + static consteval auto __get_iterator_concept() { + if constexpr (random_access_range<_Base>) + return random_access_iterator_tag{}; + else if constexpr (bidirectional_range<_Base>) + return bidirectional_iterator_tag{}; + else + return forward_iterator_tag{}; + } + + template + using __always _LIBCPP_NODEBUG = _Tp; + + template + static auto __repeat_tuple_helper(index_sequence<_Is...>) -> tuple<__always<_Tp, _Is>...>; + +public: + using iterator_category = input_iterator_tag; + using iterator_concept = decltype(__get_iterator_concept()); + using value_type = decltype(__repeat_tuple_helper>(make_index_sequence<_Np>{})); + using difference_type = range_difference_t<_Base>; + + _LIBCPP_HIDE_FROM_ABI __iterator() = default; + _LIBCPP_HIDE_FROM_ABI constexpr __iterator(__iterator __i) + requires _Const && convertible_to, iterator_t> + : __iterator(std::move(__i), make_index_sequence<_Np>{}) {} + + _LIBCPP_HIDE_FROM_ABI constexpr auto operator*() const { + return std::__tuple_transform([](auto& __i) -> decltype(auto) { return *__i; }, __current_); + } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator++() { + for (auto& __i : __current_) { + ++__i; + } + return *this; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator operator++(int) { + auto __tmp = *this; + ++*this; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator--() + requires bidirectional_range<_Base> + { + for (auto& __i : __current_) { + --__i; + } + return *this; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator operator--(int) + requires bidirectional_range<_Base> + { + auto __tmp = *this; + --*this; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator+=(difference_type __x) + requires random_access_range<_Base> + { + for (auto& __i : __current_) { + __i += __x; + } + return *this; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator-=(difference_type __x) + requires random_access_range<_Base> + { + for (auto& __i : __current_) { + __i -= __x; + } + return *this; + } + + _LIBCPP_HIDE_FROM_ABI constexpr auto operator[](difference_type __n) const + requires random_access_range<_Base> + { + return std::__tuple_transform([&](auto& __i) -> decltype(auto) { return __i[__n]; }, __current_); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator& __x, const __iterator& __y) { + return __x.__current_.back() == __y.__current_.back(); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator<(const __iterator& __x, const __iterator& __y) + requires random_access_range<_Base> + { + return __x.__current_.back() < __y.__current_.back(); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator>(const __iterator& __x, const __iterator& __y) + requires random_access_range<_Base> + { + return __y < __x; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator<=(const __iterator& __x, const __iterator& __y) + requires random_access_range<_Base> + { + return !(__y < __x); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator>=(const __iterator& __x, const __iterator& __y) + requires random_access_range<_Base> + { + return !(__x < __y); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr auto operator<=>(const __iterator& __x, const __iterator& __y) + requires random_access_range<_Base> && three_way_comparable> + { + return __x.__current_.back() <=> __y.__current_.back(); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(const __iterator& __i, difference_type __n) + requires random_access_range<_Base> + { + auto __r = __i; + __r += __n; + return __r; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(difference_type __n, const __iterator& __i) + requires random_access_range<_Base> + { + return __i + __n; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(const __iterator& __i, difference_type __n) + requires random_access_range<_Base> + { + auto __r = __i; + __r -= __n; + return __r; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr difference_type operator-(const __iterator& __x, const __iterator& __y) + requires sized_sentinel_for, iterator_t<_Base>> + { + return __x.__current_.back() - __y.__current_.back(); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr auto iter_move(const __iterator& __i) noexcept( + noexcept(ranges::iter_move(std::declval&>())) && + is_nothrow_move_constructible_v>) { + return std::__tuple_transform(ranges::iter_move, __i.__current_); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr void iter_swap(const __iterator& __l, const __iterator& __r) noexcept( + noexcept(ranges::iter_swap(std::declval>(), std::declval>()))) + requires indirectly_swappable> + { + for (size_t __i = 0; __i < _Np; ++__i) { + ranges::iter_swap(__l.__current_[__i], __r.__current_[__i]); + } + } +}; + +template + requires view<_View> && (_Np > 0) +template +class adjacent_view<_View, _Np>::__sentinel { + friend adjacent_view; + using _Base _LIBCPP_NODEBUG = __maybe_const<_Const, _View>; + sentinel_t<_Base> __end_ = sentinel_t<_Base>(); + + _LIBCPP_HIDE_FROM_ABI constexpr explicit __sentinel(sentinel_t<_Base> __end) { __end_ = std::move(__end); } + +public: + _LIBCPP_HIDE_FROM_ABI __sentinel() = default; + + _LIBCPP_HIDE_FROM_ABI constexpr __sentinel(__sentinel __i) + requires _Const && convertible_to, sentinel_t<_Base>> + : __end_(std::move(__i.__end_)) {} + + template + requires sentinel_for, iterator_t<__maybe_const<_OtherConst, _View>>> + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator<_OtherConst>& __x, const __sentinel& __y) { + return __x.__current_.back() == __y.__end_; + } + + template + requires sized_sentinel_for, iterator_t<__maybe_const<_OtherConst, _View>>> + _LIBCPP_HIDE_FROM_ABI friend constexpr range_difference_t<__maybe_const<_OtherConst, _View>> + operator-(const __iterator<_OtherConst>& __x, const __sentinel& __y) { + return __x.__current_.back() - __y.__end_; + } + + template + requires sized_sentinel_for, iterator_t<__maybe_const<_OtherConst, _View>>> + _LIBCPP_HIDE_FROM_ABI friend constexpr range_difference_t<__maybe_const<_OtherConst, _View>> + operator-(const __sentinel& __y, const __iterator<_OtherConst>& __x) { + return __y.__end_ - __x.__current_.back(); + } +}; + +template +constexpr bool enable_borrowed_range> = enable_borrowed_range<_View>; + +namespace views { +namespace __adjacent { + +template +struct __fn : __range_adaptor_closure<__fn<_Np>> { + template + requires(_Np == 0 && forward_range<_Range &&>) + _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Range&&) noexcept { + return empty_view>{}; + } + + template + _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Ranges&& __range) noexcept( + noexcept(adjacent_view, _Np>(std::forward<_Ranges>(__range)))) + -> decltype(adjacent_view, _Np>(std::forward<_Ranges>(__range))) { + return adjacent_view, _Np>(std::forward<_Ranges>(__range)); + } +}; + +} // namespace __adjacent +inline namespace __cpo { +template +inline constexpr auto adjacent = __adjacent::__fn<_Np>{}; +inline constexpr auto pairwise = adjacent<2>; +} // namespace __cpo +} // namespace views +} // namespace ranges + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANGES_ADJACENT_VIEW_H diff --git a/lib/libcxx/include/__ranges/as_rvalue_view.h b/lib/libcxx/include/__ranges/as_rvalue_view.h index 5849a6c368..a553f39998 100644 --- a/lib/libcxx/include/__ranges/as_rvalue_view.h +++ b/lib/libcxx/include/__ranges/as_rvalue_view.h @@ -48,27 +48,27 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr explicit as_rvalue_view(_View __base) : __base_(std::move(__base)) {} - _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& requires copy_constructible<_View> { return __base_; } - _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } - _LIBCPP_HIDE_FROM_ABI constexpr auto begin() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() requires(!__simple_view<_View>) { return move_iterator(ranges::begin(__base_)); } - _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const requires range { return move_iterator(ranges::begin(__base_)); } - _LIBCPP_HIDE_FROM_ABI constexpr auto end() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() requires(!__simple_view<_View>) { if constexpr (common_range<_View>) { @@ -78,7 +78,7 @@ public: } } - _LIBCPP_HIDE_FROM_ABI constexpr auto end() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() const requires range { if constexpr (common_range) { @@ -88,13 +88,13 @@ public: } } - _LIBCPP_HIDE_FROM_ABI constexpr auto size() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() requires sized_range<_View> { return ranges::size(__base_); } - _LIBCPP_HIDE_FROM_ABI constexpr auto size() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const requires sized_range { return ranges::size(__base_); @@ -117,7 +117,7 @@ struct __fn : __range_adaptor_closure<__fn> { return /*---------------------------------*/ as_rvalue_view(std::forward<_Range>(__range)); } - template + template requires same_as, range_reference_t<_Range>> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Range&& __range) noexcept(noexcept(views::all(std::forward<_Range>(__range)))) diff --git a/lib/libcxx/include/__ranges/chunk_by_view.h b/lib/libcxx/include/__ranges/chunk_by_view.h index 71fee3a4f2..8007f76f0c 100644 --- a/lib/libcxx/include/__ranges/chunk_by_view.h +++ b/lib/libcxx/include/__ranges/chunk_by_view.h @@ -100,17 +100,17 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr explicit chunk_by_view(_View __base, _Pred __pred) : __base_(std::move(__base)), __pred_(in_place, std::move(__pred)) {} - _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& requires copy_constructible<_View> { return __base_; } - _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } - _LIBCPP_HIDE_FROM_ABI constexpr const _Pred& pred() const { return *__pred_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Pred& pred() const { return *__pred_; } - _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() { // Note: this duplicates a check in `optional` but provides a better error message. _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( __pred_.__has_value(), "Trying to call begin() on a chunk_by_view that does not have a valid predicate."); @@ -122,7 +122,7 @@ public: return {*this, std::move(__first), *__cached_begin_}; } - _LIBCPP_HIDE_FROM_ABI constexpr auto end() { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() { if constexpr (common_range<_View>) { return __iterator{*this, ranges::end(__base_), ranges::end(__base_)}; } else { @@ -155,7 +155,7 @@ public: _LIBCPP_HIDE_FROM_ABI __iterator() = default; - _LIBCPP_HIDE_FROM_ABI constexpr value_type operator*() const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr value_type operator*() const { // If the iterator is at end, this would return an empty range which can be checked by the calling code and doesn't // necessarily lead to a bad access. _LIBCPP_ASSERT_PEDANTIC(__current_ != __next_, "Trying to dereference past-the-end chunk_by_view iterator."); diff --git a/lib/libcxx/include/__ranges/common_view.h b/lib/libcxx/include/__ranges/common_view.h index 133236dd1d..eec1045c8a 100644 --- a/lib/libcxx/include/__ranges/common_view.h +++ b/lib/libcxx/include/__ranges/common_view.h @@ -56,16 +56,16 @@ public: return __base_; } - _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } - _LIBCPP_HIDE_FROM_ABI constexpr auto begin() { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() { if constexpr (random_access_range<_View> && sized_range<_View>) return ranges::begin(__base_); else return common_iterator, sentinel_t<_View>>(ranges::begin(__base_)); } - _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const requires range { if constexpr (random_access_range && sized_range) @@ -74,14 +74,14 @@ public: return common_iterator, sentinel_t>(ranges::begin(__base_)); } - _LIBCPP_HIDE_FROM_ABI constexpr auto end() { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() { if constexpr (random_access_range<_View> && sized_range<_View>) return ranges::begin(__base_) + ranges::size(__base_); else return common_iterator, sentinel_t<_View>>(ranges::end(__base_)); } - _LIBCPP_HIDE_FROM_ABI constexpr auto end() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() const requires range { if constexpr (random_access_range && sized_range) @@ -90,13 +90,13 @@ public: return common_iterator, sentinel_t>(ranges::end(__base_)); } - _LIBCPP_HIDE_FROM_ABI constexpr auto size() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() requires sized_range<_View> { return ranges::size(__base_); } - _LIBCPP_HIDE_FROM_ABI constexpr auto size() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const requires sized_range { return ranges::size(__base_); diff --git a/lib/libcxx/include/__ranges/drop_view.h b/lib/libcxx/include/__ranges/drop_view.h index 42ada9299a..feb3705d2d 100644 --- a/lib/libcxx/include/__ranges/drop_view.h +++ b/lib/libcxx/include/__ranges/drop_view.h @@ -80,14 +80,14 @@ public: _LIBCPP_ASSERT_UNCATEGORIZED(__count_ >= 0, "count must be greater than or equal to zero."); } - _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& requires copy_constructible<_View> { return __base_; } - _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } - _LIBCPP_HIDE_FROM_ABI constexpr auto begin() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() requires(!(__simple_view<_View> && random_access_range && sized_range)) { if constexpr (random_access_range<_View> && sized_range<_View>) { @@ -104,20 +104,20 @@ public: return __tmp; } - _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const requires random_access_range && sized_range { const auto __dist = std::min(ranges::distance(__base_), __count_); return ranges::begin(__base_) + __dist; } - _LIBCPP_HIDE_FROM_ABI constexpr auto end() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() requires(!__simple_view<_View>) { return ranges::end(__base_); } - _LIBCPP_HIDE_FROM_ABI constexpr auto end() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() const requires range { return ranges::end(__base_); @@ -129,13 +129,13 @@ public: return __s < __c ? 0 : __s - __c; } - _LIBCPP_HIDE_FROM_ABI constexpr auto size() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() requires sized_range<_View> { return __size(*this); } - _LIBCPP_HIDE_FROM_ABI constexpr auto size() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const requires sized_range { return __size(*this); diff --git a/lib/libcxx/include/__ranges/drop_while_view.h b/lib/libcxx/include/__ranges/drop_while_view.h index bc7f019393..1fe4e17f80 100644 --- a/lib/libcxx/include/__ranges/drop_while_view.h +++ b/lib/libcxx/include/__ranges/drop_while_view.h @@ -57,17 +57,17 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 drop_while_view(_View __base, _Pred __pred) : __base_(std::move(__base)), __pred_(std::in_place, std::move(__pred)) {} - _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& requires copy_constructible<_View> { return __base_; } - _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } - _LIBCPP_HIDE_FROM_ABI constexpr const _Pred& pred() const { return *__pred_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Pred& pred() const { return *__pred_; } - _LIBCPP_HIDE_FROM_ABI constexpr auto begin() { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() { // Note: this duplicates a check in `optional` but provides a better error message. _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( __pred_.__has_value(), @@ -83,7 +83,7 @@ public: } } - _LIBCPP_HIDE_FROM_ABI constexpr auto end() { return ranges::end(__base_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() { return ranges::end(__base_); } private: _LIBCPP_NO_UNIQUE_ADDRESS _View __base_ = _View(); diff --git a/lib/libcxx/include/__ranges/elements_of.h b/lib/libcxx/include/__ranges/elements_of.h new file mode 100644 index 0000000000..3f89f49d18 --- /dev/null +++ b/lib/libcxx/include/__ranges/elements_of.h @@ -0,0 +1,49 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANGES_ELEMENTS_OF_H +#define _LIBCPP___RANGES_ELEMENTS_OF_H + +#include <__config> +#include <__cstddef/byte.h> +#include <__memory/allocator.h> +#include <__ranges/concepts.h> +#include <__utility/forward.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 23 + +namespace ranges { + +template > +struct elements_of { + _LIBCPP_NO_UNIQUE_ADDRESS _Range range; + _LIBCPP_NO_UNIQUE_ADDRESS _Allocator allocator = _Allocator(); +}; + +template > +elements_of(_Range&&, _Allocator = _Allocator()) -> elements_of<_Range&&, _Allocator>; + +} // namespace ranges + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANGES_ELEMENTS_OF_H diff --git a/lib/libcxx/include/__ranges/empty_view.h b/lib/libcxx/include/__ranges/empty_view.h index fc08492110..54d62b3c77 100644 --- a/lib/libcxx/include/__ranges/empty_view.h +++ b/lib/libcxx/include/__ranges/empty_view.h @@ -29,11 +29,11 @@ template requires is_object_v<_Tp> class empty_view : public view_interface> { public: - _LIBCPP_HIDE_FROM_ABI static constexpr _Tp* begin() noexcept { return nullptr; } - _LIBCPP_HIDE_FROM_ABI static constexpr _Tp* end() noexcept { return nullptr; } - _LIBCPP_HIDE_FROM_ABI static constexpr _Tp* data() noexcept { return nullptr; } - _LIBCPP_HIDE_FROM_ABI static constexpr size_t size() noexcept { return 0; } - _LIBCPP_HIDE_FROM_ABI static constexpr bool empty() noexcept { return true; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr _Tp* begin() noexcept { return nullptr; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr _Tp* end() noexcept { return nullptr; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr _Tp* data() noexcept { return nullptr; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr size_t size() noexcept { return 0; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr bool empty() noexcept { return true; } }; template diff --git a/lib/libcxx/include/__ranges/filter_view.h b/lib/libcxx/include/__ranges/filter_view.h index 07980e7353..3ad69ea100 100644 --- a/lib/libcxx/include/__ranges/filter_view.h +++ b/lib/libcxx/include/__ranges/filter_view.h @@ -76,16 +76,16 @@ public: : __base_(std::move(__base)), __pred_(in_place, std::move(__pred)) {} template - _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& requires copy_constructible<_Vp> { return __base_; } - _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } - _LIBCPP_HIDE_FROM_ABI constexpr _Pred const& pred() const { return *__pred_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Pred const& pred() const { return *__pred_; } - _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() { // Note: this duplicates a check in `optional` but provides a better error message. _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( __pred_.__has_value(), "Trying to call begin() on a filter_view that does not have a valid predicate."); @@ -99,7 +99,7 @@ public: } } - _LIBCPP_HIDE_FROM_ABI constexpr auto end() { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() { if constexpr (common_range<_View>) return __iterator{*this, ranges::end(__base_)}; else @@ -148,10 +148,10 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr __iterator(filter_view& __parent, iterator_t<_View> __current) : __current_(std::move(__current)), __parent_(std::addressof(__parent)) {} - _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_View> const& base() const& noexcept { return __current_; } - _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_View> base() && { return std::move(__current_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_View> const& base() const& noexcept { return __current_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_View> base() && { return std::move(__current_); } - _LIBCPP_HIDE_FROM_ABI constexpr range_reference_t<_View> operator*() const { return *__current_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr range_reference_t<_View> operator*() const { return *__current_; } _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_View> operator->() const requires __has_arrow> && copyable> { @@ -194,7 +194,7 @@ public: return __x.__current_ == __y.__current_; } - _LIBCPP_HIDE_FROM_ABI friend constexpr range_rvalue_reference_t<_View> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr range_rvalue_reference_t<_View> iter_move(__iterator const& __it) noexcept(noexcept(ranges::iter_move(__it.__current_))) { return ranges::iter_move(__it.__current_); } @@ -218,7 +218,7 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr explicit __sentinel(filter_view& __parent) : __end_(ranges::end(__parent.__base_)) {} - _LIBCPP_HIDE_FROM_ABI constexpr sentinel_t<_View> base() const { return __end_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr sentinel_t<_View> base() const { return __end_; } _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(__iterator const& __x, __sentinel const& __y) { return __x.__current_ == __y.__end_; diff --git a/lib/libcxx/include/__ranges/iota_view.h b/lib/libcxx/include/__ranges/iota_view.h index 4b84585258..6b2576ec6b 100644 --- a/lib/libcxx/include/__ranges/iota_view.h +++ b/lib/libcxx/include/__ranges/iota_view.h @@ -30,6 +30,7 @@ #include <__ranges/movable_box.h> #include <__ranges/view_interface.h> #include <__type_traits/conditional.h> +#include <__type_traits/decay.h> #include <__type_traits/is_nothrow_constructible.h> #include <__type_traits/make_unsigned.h> #include <__type_traits/type_identity.h> @@ -57,11 +58,17 @@ struct __get_wider_signed { return type_identity{}; else if constexpr (sizeof(_Int) < sizeof(long)) return type_identity{}; - else + else if constexpr (sizeof(_Int) < sizeof(long long)) return type_identity{}; - - static_assert( - sizeof(_Int) <= sizeof(long long), "Found integer-like type that is bigger than largest integer like type."); +# if _LIBCPP_HAS_INT128 + else if constexpr (sizeof(_Int) <= sizeof(__int128)) + return type_identity<__int128>{}; +# else + else if constexpr (sizeof(_Int) <= sizeof(long long)) + return type_identity{}; +# endif + else + static_assert(false, "Found integer-like type that is bigger than the largest integer like type."); } using type = typename decltype(__call())::type; @@ -125,7 +132,8 @@ class iota_view : public view_interface> { _LIBCPP_HIDE_FROM_ABI constexpr explicit __iterator(_Start __value) : __value_(std::move(__value)) {} - _LIBCPP_HIDE_FROM_ABI constexpr _Start operator*() const noexcept(is_nothrow_copy_constructible_v<_Start>) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Start operator*() const + noexcept(is_nothrow_copy_constructible_v<_Start>) { return __value_; } @@ -189,7 +197,7 @@ class iota_view : public view_interface> { return *this; } - _LIBCPP_HIDE_FROM_ABI constexpr _Start operator[](difference_type __n) const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Start operator[](difference_type __n) const requires __advanceable<_Start> { return _Start(__value_ + __n); @@ -231,27 +239,28 @@ class iota_view : public view_interface> { return __x.__value_ <=> __y.__value_; } - _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(__iterator __i, difference_type __n) + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(__iterator __i, difference_type __n) requires __advanceable<_Start> { __i += __n; return __i; } - _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(difference_type __n, __iterator __i) + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(difference_type __n, __iterator __i) requires __advanceable<_Start> { return __i + __n; } - _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(__iterator __i, difference_type __n) + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(__iterator __i, difference_type __n) requires __advanceable<_Start> { __i -= __n; return __i; } - _LIBCPP_HIDE_FROM_ABI friend constexpr difference_type operator-(const __iterator& __x, const __iterator& __y) + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr difference_type + operator-(const __iterator& __x, const __iterator& __y) requires __advanceable<_Start> { if constexpr (__integer_like<_Start>) { @@ -282,14 +291,14 @@ class iota_view : public view_interface> { return __x.__value_ == __y.__bound_sentinel_; } - _LIBCPP_HIDE_FROM_ABI friend constexpr iter_difference_t<_Start> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr iter_difference_t<_Start> operator-(const __iterator& __x, const __sentinel& __y) requires sized_sentinel_for<_BoundSentinel, _Start> { return __x.__value_ - __y.__bound_sentinel_; } - _LIBCPP_HIDE_FROM_ABI friend constexpr iter_difference_t<_Start> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr iter_difference_t<_Start> operator-(const __sentinel& __x, const __iterator& __y) requires sized_sentinel_for<_BoundSentinel, _Start> { @@ -329,24 +338,24 @@ public: requires(!same_as<_Start, _BoundSentinel> && !same_as<_BoundSentinel, unreachable_sentinel_t>) : iota_view(std::move(__first.__value_), std::move(__last.__bound_sentinel_)) {} - _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() const { return __iterator{__value_}; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() const { return __iterator{__value_}; } - _LIBCPP_HIDE_FROM_ABI constexpr auto end() const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() const { if constexpr (same_as<_BoundSentinel, unreachable_sentinel_t>) return unreachable_sentinel; else return __sentinel{__bound_sentinel_}; } - _LIBCPP_HIDE_FROM_ABI constexpr __iterator end() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __iterator end() const requires same_as<_Start, _BoundSentinel> { return __iterator{__bound_sentinel_}; } - _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const { return __value_ == __bound_sentinel_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const { return __value_ == __bound_sentinel_; } - _LIBCPP_HIDE_FROM_ABI constexpr auto size() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const requires(same_as<_Start, _BoundSentinel> && __advanceable<_Start>) || (integral<_Start> && integral<_BoundSentinel>) || sized_sentinel_for<_BoundSentinel, _Start> { @@ -374,14 +383,15 @@ namespace views { namespace __iota { struct __fn { template - _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Start&& __start) const - noexcept(noexcept(ranges::iota_view(std::forward<_Start>(__start)))) - -> decltype(ranges::iota_view(std::forward<_Start>(__start))) { - return ranges::iota_view(std::forward<_Start>(__start)); + requires(requires(_Start __s) { ranges::iota_view>(std::forward<_Start>(__s)); }) + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Start&& __start) const + noexcept(noexcept(ranges::iota_view>(std::forward<_Start>(__start)))) { + return ranges::iota_view>(std::forward<_Start>(__start)); } template - _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Start&& __start, _BoundSentinel&& __bound_sentinel) const noexcept( + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto + operator()(_Start&& __start, _BoundSentinel&& __bound_sentinel) const noexcept( noexcept(ranges::iota_view(std::forward<_Start>(__start), std::forward<_BoundSentinel>(__bound_sentinel)))) -> decltype(ranges::iota_view(std::forward<_Start>(__start), std::forward<_BoundSentinel>(__bound_sentinel))) { return ranges::iota_view(std::forward<_Start>(__start), std::forward<_BoundSentinel>(__bound_sentinel)); @@ -392,6 +402,15 @@ struct __fn { inline namespace __cpo { inline constexpr auto iota = __iota::__fn{}; } // namespace __cpo + +# if _LIBCPP_STD_VER >= 26 + +inline constexpr auto indices = [] [[nodiscard]] (__integer_like auto __size) static { + return ranges::views::iota(decltype(__size){}, __size); +}; + +# endif + } // namespace views } // namespace ranges diff --git a/lib/libcxx/include/__ranges/owning_view.h b/lib/libcxx/include/__ranges/owning_view.h index 254bdb4329..1ab81afee7 100644 --- a/lib/libcxx/include/__ranges/owning_view.h +++ b/lib/libcxx/include/__ranges/owning_view.h @@ -49,52 +49,52 @@ public: _LIBCPP_HIDE_FROM_ABI owning_view(owning_view&&) = default; _LIBCPP_HIDE_FROM_ABI owning_view& operator=(owning_view&&) = default; - _LIBCPP_HIDE_FROM_ABI constexpr _Rp& base() & noexcept { return __r_; } - _LIBCPP_HIDE_FROM_ABI constexpr const _Rp& base() const& noexcept { return __r_; } - _LIBCPP_HIDE_FROM_ABI constexpr _Rp&& base() && noexcept { return std::move(__r_); } - _LIBCPP_HIDE_FROM_ABI constexpr const _Rp&& base() const&& noexcept { return std::move(__r_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Rp& base() & noexcept { return __r_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Rp& base() const& noexcept { return __r_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Rp&& base() && noexcept { return std::move(__r_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Rp&& base() const&& noexcept { return std::move(__r_); } - _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_Rp> begin() { return ranges::begin(__r_); } - _LIBCPP_HIDE_FROM_ABI constexpr sentinel_t<_Rp> end() { return ranges::end(__r_); } - _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_Rp> begin() { return ranges::begin(__r_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr sentinel_t<_Rp> end() { return ranges::end(__r_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const requires range { return ranges::begin(__r_); } - _LIBCPP_HIDE_FROM_ABI constexpr auto end() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() const requires range { return ranges::end(__r_); } - _LIBCPP_HIDE_FROM_ABI constexpr bool empty() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool empty() requires requires { ranges::empty(__r_); } { return ranges::empty(__r_); } - _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const requires requires { ranges::empty(__r_); } { return ranges::empty(__r_); } - _LIBCPP_HIDE_FROM_ABI constexpr auto size() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() requires sized_range<_Rp> { return ranges::size(__r_); } - _LIBCPP_HIDE_FROM_ABI constexpr auto size() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const requires sized_range { return ranges::size(__r_); } - _LIBCPP_HIDE_FROM_ABI constexpr auto data() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto data() requires contiguous_range<_Rp> { return ranges::data(__r_); } - _LIBCPP_HIDE_FROM_ABI constexpr auto data() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto data() const requires contiguous_range { return ranges::data(__r_); diff --git a/lib/libcxx/include/__ranges/ref_view.h b/lib/libcxx/include/__ranges/ref_view.h index 5329d778dd..109a10cec2 100644 --- a/lib/libcxx/include/__ranges/ref_view.h +++ b/lib/libcxx/include/__ranges/ref_view.h @@ -51,24 +51,24 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr ref_view(_Tp&& __t) : __range_(std::addressof(static_cast<_Range&>(std::forward<_Tp>(__t)))) {} - _LIBCPP_HIDE_FROM_ABI constexpr _Range& base() const { return *__range_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Range& base() const { return *__range_; } - _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_Range> begin() const { return ranges::begin(*__range_); } - _LIBCPP_HIDE_FROM_ABI constexpr sentinel_t<_Range> end() const { return ranges::end(*__range_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_Range> begin() const { return ranges::begin(*__range_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr sentinel_t<_Range> end() const { return ranges::end(*__range_); } - _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const requires requires { ranges::empty(*__range_); } { return ranges::empty(*__range_); } - _LIBCPP_HIDE_FROM_ABI constexpr auto size() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const requires sized_range<_Range> { return ranges::size(*__range_); } - _LIBCPP_HIDE_FROM_ABI constexpr auto data() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto data() const requires contiguous_range<_Range> { return ranges::data(*__range_); diff --git a/lib/libcxx/include/__ranges/repeat_view.h b/lib/libcxx/include/__ranges/repeat_view.h index 56b09701c8..9192183f48 100644 --- a/lib/libcxx/include/__ranges/repeat_view.h +++ b/lib/libcxx/include/__ranges/repeat_view.h @@ -108,17 +108,21 @@ public: __bound_ >= 0, "The behavior is undefined if Bound is not unreachable_sentinel_t and bound is negative"); } - _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() const { return __iterator(std::addressof(*__value_)); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() const { + return __iterator(std::addressof(*__value_)); + } - _LIBCPP_HIDE_FROM_ABI constexpr __iterator end() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __iterator end() const requires(!same_as<_Bound, unreachable_sentinel_t>) { return __iterator(std::addressof(*__value_), __bound_); } - _LIBCPP_HIDE_FROM_ABI constexpr unreachable_sentinel_t end() const noexcept { return unreachable_sentinel; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr unreachable_sentinel_t end() const noexcept { + return unreachable_sentinel; + } - _LIBCPP_HIDE_FROM_ABI constexpr auto size() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const requires(!same_as<_Bound, unreachable_sentinel_t>) { return std::__to_unsigned_like(__bound_); @@ -152,7 +156,7 @@ public: _LIBCPP_HIDE_FROM_ABI __iterator() = default; - _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator*() const noexcept { return *__value_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator*() const noexcept { return *__value_; } _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator++() { ++__current_; @@ -192,7 +196,9 @@ public: return *this; } - _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator[](difference_type __n) const noexcept { return *(*this + __n); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator[](difference_type __n) const noexcept { + return *(*this + __n); + } _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator& __x, const __iterator& __y) { return __x.__current_ == __y.__current_; @@ -202,22 +208,23 @@ public: return __x.__current_ <=> __y.__current_; } - _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(__iterator __i, difference_type __n) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(__iterator __i, difference_type __n) { __i += __n; return __i; } - _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(difference_type __n, __iterator __i) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(difference_type __n, __iterator __i) { __i += __n; return __i; } - _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(__iterator __i, difference_type __n) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(__iterator __i, difference_type __n) { __i -= __n; return __i; } - _LIBCPP_HIDE_FROM_ABI friend constexpr difference_type operator-(const __iterator& __x, const __iterator& __y) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr difference_type + operator-(const __iterator& __x, const __iterator& __y) { return static_cast(__x.__current_) - static_cast(__y.__current_); } diff --git a/lib/libcxx/include/__ranges/single_view.h b/lib/libcxx/include/__ranges/single_view.h index 955578b99c..213c507138 100644 --- a/lib/libcxx/include/__ranges/single_view.h +++ b/lib/libcxx/include/__ranges/single_view.h @@ -63,21 +63,21 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr explicit single_view(in_place_t, _Args&&... __args) : __value_{in_place, std::forward<_Args>(__args)...} {} - _LIBCPP_HIDE_FROM_ABI constexpr _Tp* begin() noexcept { return data(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp* begin() noexcept { return data(); } - _LIBCPP_HIDE_FROM_ABI constexpr const _Tp* begin() const noexcept { return data(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp* begin() const noexcept { return data(); } - _LIBCPP_HIDE_FROM_ABI constexpr _Tp* end() noexcept { return data() + 1; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp* end() noexcept { return data() + 1; } - _LIBCPP_HIDE_FROM_ABI constexpr const _Tp* end() const noexcept { return data() + 1; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp* end() const noexcept { return data() + 1; } - _LIBCPP_HIDE_FROM_ABI static constexpr bool empty() noexcept { return false; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr bool empty() noexcept { return false; } - _LIBCPP_HIDE_FROM_ABI static constexpr size_t size() noexcept { return 1; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr size_t size() noexcept { return 1; } - _LIBCPP_HIDE_FROM_ABI constexpr _Tp* data() noexcept { return __value_.operator->(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp* data() noexcept { return __value_.operator->(); } - _LIBCPP_HIDE_FROM_ABI constexpr const _Tp* data() const noexcept { return __value_.operator->(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp* data() const noexcept { return __value_.operator->(); } }; template diff --git a/lib/libcxx/include/__ranges/take_view.h b/lib/libcxx/include/__ranges/take_view.h index 85723dc5e3..13cb4a285d 100644 --- a/lib/libcxx/include/__ranges/take_view.h +++ b/lib/libcxx/include/__ranges/take_view.h @@ -75,15 +75,15 @@ public: _LIBCPP_ASSERT_UNCATEGORIZED(__count >= 0, "count has to be greater than or equal to zero"); } - _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& requires copy_constructible<_View> { return __base_; } - _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } - _LIBCPP_HIDE_FROM_ABI constexpr auto begin() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() requires(!__simple_view<_View>) { if constexpr (sized_range<_View>) { @@ -99,7 +99,7 @@ public: } } - _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const requires range { if constexpr (sized_range) { @@ -115,7 +115,7 @@ public: } } - _LIBCPP_HIDE_FROM_ABI constexpr auto end() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() requires(!__simple_view<_View>) { if constexpr (sized_range<_View>) { @@ -129,7 +129,7 @@ public: } } - _LIBCPP_HIDE_FROM_ABI constexpr auto end() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() const requires range { if constexpr (sized_range) { @@ -143,14 +143,14 @@ public: } } - _LIBCPP_HIDE_FROM_ABI constexpr auto size() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() requires sized_range<_View> { auto __n = ranges::size(__base_); return ranges::min(__n, static_cast(__count_)); } - _LIBCPP_HIDE_FROM_ABI constexpr auto size() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const requires sized_range { auto __n = ranges::size(__base_); @@ -178,7 +178,7 @@ public: requires _Const && convertible_to, sentinel_t<_Base>> : __end_(std::move(__s.__end_)) {} - _LIBCPP_HIDE_FROM_ABI constexpr sentinel_t<_Base> base() const { return __end_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr sentinel_t<_Base> base() const { return __end_; } _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const _Iter<_Const>& __lhs, const __sentinel& __rhs) { return __lhs.count() == 0 || __lhs.base() == __rhs.__end_; diff --git a/lib/libcxx/include/__ranges/transform_view.h b/lib/libcxx/include/__ranges/transform_view.h index ae85dfa452..ab1adf9cdb 100644 --- a/lib/libcxx/include/__ranges/transform_view.h +++ b/lib/libcxx/include/__ranges/transform_view.h @@ -13,7 +13,6 @@ #include <__compare/three_way_comparable.h> #include <__concepts/constructible.h> #include <__concepts/convertible_to.h> -#include <__concepts/copyable.h> #include <__concepts/derived_from.h> #include <__concepts/equality_comparable.h> #include <__concepts/invocable.h> @@ -64,7 +63,7 @@ concept __regular_invocable_with_range_ref = regular_invocable<_Fn, range_refere template concept __transform_view_constraints = view<_View> && is_object_v<_Fn> && regular_invocable<_Fn&, range_reference_t<_View>> && - __is_referenceable_v>>; + __referenceable>>; # if _LIBCPP_STD_VER >= 23 template diff --git a/lib/libcxx/include/__ranges/view_interface.h b/lib/libcxx/include/__ranges/view_interface.h index 3bcfbaf3a2..37b2c9e2c1 100644 --- a/lib/libcxx/include/__ranges/view_interface.h +++ b/lib/libcxx/include/__ranges/view_interface.h @@ -87,35 +87,35 @@ public: } template - _LIBCPP_HIDE_FROM_ABI constexpr auto data() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto data() requires contiguous_iterator> { return std::to_address(ranges::begin(__derived())); } template - _LIBCPP_HIDE_FROM_ABI constexpr auto data() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto data() const requires range && contiguous_iterator> { return std::to_address(ranges::begin(__derived())); } template - _LIBCPP_HIDE_FROM_ABI constexpr auto size() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() requires forward_range<_D2> && sized_sentinel_for, iterator_t<_D2>> { return std::__to_unsigned_like(ranges::end(__derived()) - ranges::begin(__derived())); } template - _LIBCPP_HIDE_FROM_ABI constexpr auto size() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const requires forward_range && sized_sentinel_for, iterator_t> { return std::__to_unsigned_like(ranges::end(__derived()) - ranges::begin(__derived())); } template - _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) front() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) front() requires forward_range<_D2> { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( @@ -124,7 +124,7 @@ public: } template - _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) front() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) front() const requires forward_range { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( @@ -133,7 +133,7 @@ public: } template - _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) back() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) back() requires bidirectional_range<_D2> && common_range<_D2> { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( @@ -142,7 +142,7 @@ public: } template - _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) back() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) back() const requires bidirectional_range && common_range { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( @@ -151,12 +151,12 @@ public: } template - _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator[](range_difference_t<_RARange> __index) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator[](range_difference_t<_RARange> __index) { return ranges::begin(__derived())[__index]; } template - _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator[](range_difference_t<_RARange> __index) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator[](range_difference_t<_RARange> __index) const { return ranges::begin(__derived())[__index]; } }; diff --git a/lib/libcxx/include/__ranges/zip_transform_view.h b/lib/libcxx/include/__ranges/zip_transform_view.h new file mode 100644 index 0000000000..07aa182f28 --- /dev/null +++ b/lib/libcxx/include/__ranges/zip_transform_view.h @@ -0,0 +1,357 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANGES_ZIP_TRANSFORM_VIEW_H +#define _LIBCPP___RANGES_ZIP_TRANSFORM_VIEW_H + +#include <__config> + +#include <__concepts/constructible.h> +#include <__concepts/convertible_to.h> +#include <__concepts/derived_from.h> +#include <__concepts/equality_comparable.h> +#include <__concepts/invocable.h> +#include <__functional/invoke.h> +#include <__iterator/concepts.h> +#include <__iterator/incrementable_traits.h> +#include <__iterator/iterator_traits.h> +#include <__memory/addressof.h> +#include <__ranges/access.h> +#include <__ranges/all.h> +#include <__ranges/concepts.h> +#include <__ranges/empty_view.h> +#include <__ranges/movable_box.h> +#include <__ranges/view_interface.h> +#include <__ranges/zip_view.h> +#include <__type_traits/decay.h> +#include <__type_traits/invoke.h> +#include <__type_traits/is_object.h> +#include <__type_traits/is_reference.h> +#include <__type_traits/is_referenceable.h> +#include <__type_traits/maybe_const.h> +#include <__type_traits/remove_cvref.h> +#include <__utility/forward.h> +#include <__utility/in_place.h> +#include <__utility/move.h> +#include // for std::apply + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 23 + +namespace ranges { + +template + requires(view<_Views> && ...) && + (sizeof...(_Views) > 0) && is_object_v<_Fn> && regular_invocable<_Fn&, range_reference_t<_Views>...> && + __referenceable...>> +class zip_transform_view : public view_interface> { + _LIBCPP_NO_UNIQUE_ADDRESS zip_view<_Views...> __zip_; + _LIBCPP_NO_UNIQUE_ADDRESS __movable_box<_Fn> __fun_; + + using _InnerView _LIBCPP_NODEBUG = zip_view<_Views...>; + template + using __ziperator _LIBCPP_NODEBUG = iterator_t<__maybe_const<_Const, _InnerView>>; + template + using __zentinel _LIBCPP_NODEBUG = sentinel_t<__maybe_const<_Const, _InnerView>>; + + template + class __iterator; + + template + class __sentinel; + +public: + _LIBCPP_HIDE_FROM_ABI zip_transform_view() = default; + + _LIBCPP_HIDE_FROM_ABI constexpr explicit zip_transform_view(_Fn __fun, _Views... __views) + : __zip_(std::move(__views)...), __fun_(in_place, std::move(__fun)) {} + + _LIBCPP_HIDE_FROM_ABI constexpr auto begin() { return __iterator(*this, __zip_.begin()); } + + _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const + requires range && regular_invocable...> + { + return __iterator(*this, __zip_.begin()); + } + + _LIBCPP_HIDE_FROM_ABI constexpr auto end() { + if constexpr (common_range<_InnerView>) { + return __iterator(*this, __zip_.end()); + } else { + return __sentinel(__zip_.end()); + } + } + + _LIBCPP_HIDE_FROM_ABI constexpr auto end() const + requires range && regular_invocable...> + { + if constexpr (common_range) { + return __iterator(*this, __zip_.end()); + } else { + return __sentinel(__zip_.end()); + } + } + + _LIBCPP_HIDE_FROM_ABI constexpr auto size() + requires sized_range<_InnerView> + { + return __zip_.size(); + } + + _LIBCPP_HIDE_FROM_ABI constexpr auto size() const + requires sized_range + { + return __zip_.size(); + } +}; + +template +zip_transform_view(_Fn, _Ranges&&...) -> zip_transform_view<_Fn, views::all_t<_Ranges>...>; + +template +struct __zip_transform_iterator_category_base {}; + +template + requires forward_range<__maybe_const<_Const, zip_view<_Views...>>> +struct __zip_transform_iterator_category_base<_Const, _Fn, _Views...> { +private: + template + using __tag _LIBCPP_NODEBUG = typename iterator_traits>>::iterator_category; + + static consteval auto __get_iterator_category() { + if constexpr (!is_reference_v&, + range_reference_t<__maybe_const<_Const, _Views>>...>>) { + return input_iterator_tag(); + } else if constexpr ((derived_from<__tag<_Views>, random_access_iterator_tag> && ...)) { + return random_access_iterator_tag(); + } else if constexpr ((derived_from<__tag<_Views>, bidirectional_iterator_tag> && ...)) { + return bidirectional_iterator_tag(); + } else if constexpr ((derived_from<__tag<_Views>, forward_iterator_tag> && ...)) { + return forward_iterator_tag(); + } else { + return input_iterator_tag(); + } + } + +public: + using iterator_category = decltype(__get_iterator_category()); +}; + +template + requires(view<_Views> && ...) && + (sizeof...(_Views) > 0) && is_object_v<_Fn> && regular_invocable<_Fn&, range_reference_t<_Views>...> && + __referenceable...>> +template +class zip_transform_view<_Fn, _Views...>::__iterator + : public __zip_transform_iterator_category_base<_Const, _Fn, _Views...> { + using _Parent _LIBCPP_NODEBUG = __maybe_const<_Const, zip_transform_view>; + using _Base _LIBCPP_NODEBUG = __maybe_const<_Const, _InnerView>; + + friend zip_transform_view<_Fn, _Views...>; + + _Parent* __parent_ = nullptr; + __ziperator<_Const> __inner_; + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator(_Parent& __parent, __ziperator<_Const> __inner) + : __parent_(std::addressof(__parent)), __inner_(std::move(__inner)) {} + + _LIBCPP_HIDE_FROM_ABI constexpr auto __get_deref_and_invoke() const noexcept { + return [&__fun = *__parent_->__fun_](const auto&... __iters) noexcept(noexcept(std::invoke( + *__parent_->__fun_, *__iters...))) -> decltype(auto) { return std::invoke(__fun, *__iters...); }; + } + +public: + using iterator_concept = typename __ziperator<_Const>::iterator_concept; + using value_type = + remove_cvref_t&, range_reference_t<__maybe_const<_Const, _Views>>...>>; + using difference_type = range_difference_t<_Base>; + + _LIBCPP_HIDE_FROM_ABI __iterator() = default; + _LIBCPP_HIDE_FROM_ABI constexpr __iterator(__iterator __i) + requires _Const && convertible_to<__ziperator, __ziperator<_Const>> + : __parent_(__i.__parent_), __inner_(std::move(__i.__inner_)) {} + + _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator*() const + noexcept(noexcept(std::apply(__get_deref_and_invoke(), __zip_view_iterator_access::__get_underlying(__inner_)))) { + return std::apply(__get_deref_and_invoke(), __zip_view_iterator_access::__get_underlying(__inner_)); + } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator++() { + ++__inner_; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI constexpr void operator++(int) { ++*this; } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator operator++(int) + requires forward_range<_Base> + { + auto __tmp = *this; + ++*this; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator--() + requires bidirectional_range<_Base> + { + --__inner_; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator operator--(int) + requires bidirectional_range<_Base> + { + auto __tmp = *this; + --*this; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator+=(difference_type __x) + requires random_access_range<_Base> + { + __inner_ += __x; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator-=(difference_type __x) + requires random_access_range<_Base> + { + __inner_ -= __x; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator[](difference_type __n) const + requires random_access_range<_Base> + { + return std::apply( + [&](const _Is&... __iters) -> decltype(auto) { + return std::invoke(*__parent_->__fun_, __iters[iter_difference_t<_Is>(__n)]...); + }, + __zip_view_iterator_access::__get_underlying(__inner_)); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator& __x, const __iterator& __y) + requires equality_comparable<__ziperator<_Const>> + { + return __x.__inner_ == __y.__inner_; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr auto operator<=>(const __iterator& __x, const __iterator& __y) + requires random_access_range<_Base> + { + return __x.__inner_ <=> __y.__inner_; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(const __iterator& __i, difference_type __n) + requires random_access_range<_Base> + { + return __iterator(*__i.__parent_, __i.__inner_ + __n); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(difference_type __n, const __iterator& __i) + requires random_access_range<_Base> + { + return __iterator(*__i.__parent_, __i.__inner_ + __n); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(const __iterator& __i, difference_type __n) + requires random_access_range<_Base> + { + return __iterator(*__i.__parent_, __i.__inner_ - __n); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr difference_type operator-(const __iterator& __x, const __iterator& __y) + requires sized_sentinel_for<__ziperator<_Const>, __ziperator<_Const>> + { + return __x.__inner_ - __y.__inner_; + } +}; + +template + requires(view<_Views> && ...) && + (sizeof...(_Views) > 0) && is_object_v<_Fn> && regular_invocable<_Fn&, range_reference_t<_Views>...> && + __referenceable...>> +template +class zip_transform_view<_Fn, _Views...>::__sentinel { + __zentinel<_Const> __inner_; + + friend zip_transform_view<_Fn, _Views...>; + + _LIBCPP_HIDE_FROM_ABI constexpr explicit __sentinel(__zentinel<_Const> __inner) : __inner_(__inner) {} + +public: + _LIBCPP_HIDE_FROM_ABI __sentinel() = default; + + _LIBCPP_HIDE_FROM_ABI constexpr __sentinel(__sentinel __i) + requires _Const && convertible_to<__zentinel, __zentinel<_Const>> + : __inner_(__i.__inner_) {} + + template + requires sentinel_for<__zentinel<_Const>, __ziperator<_OtherConst>> + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator<_OtherConst>& __x, const __sentinel& __y) { + return __x.__inner_ == __y.__inner_; + } + + template + requires sized_sentinel_for<__zentinel<_Const>, __ziperator<_OtherConst>> + _LIBCPP_HIDE_FROM_ABI friend constexpr range_difference_t<__maybe_const<_OtherConst, _InnerView>> + operator-(const __iterator<_OtherConst>& __x, const __sentinel& __y) { + return __x.__inner_ - __y.__inner_; + } + + template + requires sized_sentinel_for<__zentinel<_Const>, __ziperator<_OtherConst>> + _LIBCPP_HIDE_FROM_ABI friend constexpr range_difference_t<__maybe_const<_OtherConst, _InnerView>> + operator-(const __sentinel& __x, const __iterator<_OtherConst>& __y) { + return __x.__inner_ - __y.__inner_; + } +}; + +namespace views { +namespace __zip_transform { + +struct __fn { + template + requires(move_constructible> && regular_invocable&> && + is_object_v&>>) + _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Fn&&) const + noexcept(noexcept(auto(views::empty&>>>))) { + return views::empty&>>>; + } + + template + requires(sizeof...(_Ranges) > 0) + _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Fn&& __fun, _Ranges&&... __rs) const + noexcept(noexcept(zip_transform_view(std::forward<_Fn>(__fun), std::forward<_Ranges>(__rs)...))) + -> decltype(zip_transform_view(std::forward<_Fn>(__fun), std::forward<_Ranges>(__rs)...)) { + return zip_transform_view(std::forward<_Fn>(__fun), std::forward<_Ranges>(__rs)...); + } +}; + +} // namespace __zip_transform +inline namespace __cpo { +inline constexpr auto zip_transform = __zip_transform::__fn{}; +} // namespace __cpo +} // namespace views +} // namespace ranges + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANGES_ZIP_TRANSFORM_VIEW_H diff --git a/lib/libcxx/include/__ranges/zip_view.h b/lib/libcxx/include/__ranges/zip_view.h index e2a194efcf..bea64c4997 100644 --- a/lib/libcxx/include/__ranges/zip_view.h +++ b/lib/libcxx/include/__ranges/zip_view.h @@ -31,6 +31,7 @@ #include <__ranges/enable_borrowed_range.h> #include <__ranges/size.h> #include <__ranges/view_interface.h> +#include <__tuple/tuple_transform.h> #include <__type_traits/is_nothrow_constructible.h> #include <__type_traits/make_unsigned.h> #include <__utility/declval.h> @@ -58,15 +59,6 @@ concept __zip_is_common = (!(bidirectional_range<_Ranges> && ...) && (common_range<_Ranges> && ...)) || ((random_access_range<_Ranges> && ...) && (sized_range<_Ranges> && ...)); -template -_LIBCPP_HIDE_FROM_ABI constexpr auto __tuple_transform(_Fun&& __f, _Tuple&& __tuple) { - return std::apply( - [&](_Types&&... __elements) { - return tuple...>(std::invoke(__f, std::forward<_Types>(__elements))...); - }, - std::forward<_Tuple>(__tuple)); -} - template _LIBCPP_HIDE_FROM_ABI constexpr void __tuple_for_each(_Fun&& __f, _Tuple&& __tuple) { std::apply( @@ -145,24 +137,24 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr auto begin() requires(!(__simple_view<_Views> && ...)) { - return __iterator(ranges::__tuple_transform(ranges::begin, __views_)); + return __iterator(std::__tuple_transform(ranges::begin, __views_)); } _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const requires(range && ...) { - return __iterator(ranges::__tuple_transform(ranges::begin, __views_)); + return __iterator(std::__tuple_transform(ranges::begin, __views_)); } _LIBCPP_HIDE_FROM_ABI constexpr auto end() requires(!(__simple_view<_Views> && ...)) { if constexpr (!__zip_is_common<_Views...>) { - return __sentinel(ranges::__tuple_transform(ranges::end, __views_)); + return __sentinel(std::__tuple_transform(ranges::end, __views_)); } else if constexpr ((random_access_range<_Views> && ...)) { return begin() + iter_difference_t<__iterator>(size()); } else { - return __iterator(ranges::__tuple_transform(ranges::end, __views_)); + return __iterator(std::__tuple_transform(ranges::end, __views_)); } } @@ -170,11 +162,11 @@ public: requires(range && ...) { if constexpr (!__zip_is_common) { - return __sentinel(ranges::__tuple_transform(ranges::end, __views_)); + return __sentinel(std::__tuple_transform(ranges::end, __views_)); } else if constexpr ((random_access_range && ...)) { return begin() + iter_difference_t<__iterator>(size()); } else { - return __iterator(ranges::__tuple_transform(ranges::end, __views_)); + return __iterator(std::__tuple_transform(ranges::end, __views_)); } } @@ -186,7 +178,7 @@ public: using _CT = make_unsigned_t>; return ranges::min({_CT(__sizes)...}); }, - ranges::__tuple_transform(ranges::size, __views_)); + std::__tuple_transform(ranges::size, __views_)); } _LIBCPP_HIDE_FROM_ABI constexpr auto size() const @@ -197,7 +189,7 @@ public: using _CT = make_unsigned_t>; return ranges::min({_CT(__sizes)...}); }, - ranges::__tuple_transform(ranges::size, __views_)); + std::__tuple_transform(ranges::size, __views_)); } }; @@ -235,6 +227,13 @@ struct __zip_view_iterator_category_base<_Const, _Views...> { using iterator_category = input_iterator_tag; }; +struct __zip_view_iterator_access { + template + _LIBCPP_HIDE_FROM_ABI static constexpr decltype(auto) __get_underlying(_Iter& __iter) noexcept { + return (__iter.__current_); + } +}; + template requires(view<_Views> && ...) && (sizeof...(_Views) > 0) template @@ -255,6 +254,7 @@ class zip_view<_Views...>::__iterator : public __zip_view_iterator_category_base static constexpr bool __is_zip_view_iterator = true; friend struct __product_iterator_traits<__iterator>; + friend __zip_view_iterator_access; public: using iterator_concept = decltype(ranges::__get_zip_view_iterator_tag<_Const, _Views...>()); @@ -268,7 +268,7 @@ public: : __current_(std::move(__i.__current_)) {} _LIBCPP_HIDE_FROM_ABI constexpr auto operator*() const { - return ranges::__tuple_transform([](auto& __i) -> decltype(auto) { return *__i; }, __current_); + return std::__tuple_transform([](auto& __i) -> decltype(auto) { return *__i; }, __current_); } _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator++() { @@ -318,7 +318,7 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr auto operator[](difference_type __n) const requires __zip_all_random_access<_Const, _Views...> { - return ranges::__tuple_transform( + return std::__tuple_transform( [&](_Iter& __i) -> decltype(auto) { return __i[iter_difference_t<_Iter>(__n)]; }, __current_); } @@ -377,7 +377,7 @@ public: _LIBCPP_HIDE_FROM_ABI friend constexpr auto iter_move(const __iterator& __i) noexcept( (noexcept(ranges::iter_move(std::declval>&>())) && ...) && (is_nothrow_move_constructible_v>> && ...)) { - return ranges::__tuple_transform(ranges::iter_move, __i.__current_); + return std::__tuple_transform(ranges::iter_move, __i.__current_); } _LIBCPP_HIDE_FROM_ABI friend constexpr void iter_swap(const __iterator& __l, const __iterator& __r) noexcept( diff --git a/lib/libcxx/include/__split_buffer b/lib/libcxx/include/__split_buffer index 21e58f4abc..d6176f8ca2 100644 --- a/lib/libcxx/include/__split_buffer +++ b/lib/libcxx/include/__split_buffer @@ -13,10 +13,12 @@ #include <__algorithm/max.h> #include <__algorithm/move.h> #include <__algorithm/move_backward.h> +#include <__assert> #include <__config> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> #include <__iterator/move_iterator.h> +#include <__memory/addressof.h> #include <__memory/allocate_at_least.h> #include <__memory/allocator.h> #include <__memory/allocator_traits.h> @@ -28,11 +30,9 @@ #include <__type_traits/integral_constant.h> #include <__type_traits/is_nothrow_assignable.h> #include <__type_traits/is_nothrow_constructible.h> -#include <__type_traits/is_replaceable.h> #include <__type_traits/is_swappable.h> #include <__type_traits/is_trivially_destructible.h> #include <__type_traits/is_trivially_relocatable.h> -#include <__type_traits/remove_reference.h> #include <__utility/forward.h> #include <__utility/move.h> @@ -45,25 +45,430 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -// __split_buffer allocates a contiguous chunk of memory and stores objects in the range [__begin_, __end_). -// It has uninitialized memory in the ranges [__first_, __begin_) and [__end_, __cap_). That allows -// it to grow both in the front and back without having to move the data. +template class _Layout> +class __split_buffer; + +template +class __split_buffer_pointer_layout { +protected: + using value_type = _Tp; + using allocator_type = _Allocator; + using __alloc_traits _LIBCPP_NODEBUG = allocator_traits; + using reference = value_type&; + using const_reference = const value_type&; + using size_type = typename __alloc_traits::size_type; + using difference_type = typename __alloc_traits::difference_type; + using pointer = typename __alloc_traits::pointer; + using const_pointer = typename __alloc_traits::const_pointer; + using iterator = pointer; + using const_iterator = const_pointer; + using __sentinel_type _LIBCPP_NODEBUG = pointer; -template > -struct __split_buffer { public: - using value_type = _Tp; - using allocator_type = _Allocator; - using __alloc_rr _LIBCPP_NODEBUG = __libcpp_remove_reference_t; - using __alloc_traits _LIBCPP_NODEBUG = allocator_traits<__alloc_rr>; - using reference = value_type&; - using const_reference = const value_type&; - using size_type = typename __alloc_traits::size_type; - using difference_type = typename __alloc_traits::difference_type; - using pointer = typename __alloc_traits::pointer; - using const_pointer = typename __alloc_traits::const_pointer; - using iterator = pointer; - using const_iterator = const_pointer; + // Can't be defaulted due to _LIBCPP_COMPRESSED_PAIR not being an aggregate in C++03 and C++11. + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer_pointer_layout() : __back_cap_(nullptr) {} + + _LIBCPP_CONSTEXPR_SINCE_CXX20 + _LIBCPP_HIDE_FROM_ABI explicit __split_buffer_pointer_layout(const allocator_type& __alloc) + : __back_cap_(nullptr), __alloc_(__alloc) {} + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer __front_cap() _NOEXCEPT { return __front_cap_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_pointer __front_cap() const _NOEXCEPT { + return __front_cap_; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer begin() _NOEXCEPT { return __begin_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_pointer begin() const _NOEXCEPT { return __begin_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer end() _NOEXCEPT { return __end_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer end() const _NOEXCEPT { return __end_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { + return static_cast(__end_ - __begin_); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __begin_ == __end_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type capacity() const _NOEXCEPT { + return static_cast(__back_cap_ - __front_cap_); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI allocator_type& __get_allocator() _NOEXCEPT { return __alloc_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI allocator_type const& __get_allocator() const _NOEXCEPT { + return __alloc_; + } + + // Returns the sentinel object directly. Should be used in conjunction with automatic type deduction, + // not explicit types. + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __sentinel_type __raw_sentinel() const _NOEXCEPT { + return __end_; + } + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __sentinel_type __raw_capacity() const _NOEXCEPT { + return __back_cap_; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_data(pointer __new_first) _NOEXCEPT { + __front_cap_ = __new_first; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void + __set_valid_range(pointer __new_begin, pointer __new_end) _NOEXCEPT { + __begin_ = __new_begin; + __end_ = __new_end; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void + __set_valid_range(pointer __new_begin, size_type __new_size) _NOEXCEPT { + __begin_ = __new_begin; + __end_ = __begin_ + __new_size; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_sentinel(pointer __new_end) _NOEXCEPT { + _LIBCPP_ASSERT_INTERNAL(__front_cap_ <= __new_end, "__new_end cannot precede __front_cap_"); + __end_ = __new_end; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_sentinel(size_type __new_size) _NOEXCEPT { + __end_ = __begin_ + __new_size; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_capacity(size_type __new_capacity) _NOEXCEPT { + __back_cap_ = __front_cap_ + __new_capacity; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_capacity(pointer __new_capacity) _NOEXCEPT { + __back_cap_ = __new_capacity; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __front_spare() const _NOEXCEPT { + return static_cast(__begin_ - __front_cap_); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __back_spare() const _NOEXCEPT { + return static_cast(__back_cap_ - __end_); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference back() _NOEXCEPT { return *(__end_ - 1); } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT { return *(__end_ - 1); } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __swap_without_allocator( + __split_buffer_pointer_layout<__split_buffer, + value_type, + allocator_type>& __other) _NOEXCEPT { + std::swap(__front_cap_, __other.__front_cap_); + std::swap(__begin_, __other.__begin_); + std::swap(__back_cap_, __other.__back_cap_); + std::swap(__end_, __other.__end_); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void swap(__split_buffer_pointer_layout& __other) _NOEXCEPT { + std::swap(__front_cap_, __other.__front_cap_); + std::swap(__begin_, __other.__begin_); + std::swap(__back_cap_, __other.__back_cap_); + std::swap(__end_, __other.__end_); + std::__swap_allocator(__alloc_, __other.__alloc_); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __reset() _NOEXCEPT { + __front_cap_ = nullptr; + __begin_ = nullptr; + __end_ = nullptr; + __back_cap_ = nullptr; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void + __copy_without_alloc(__split_buffer_pointer_layout const& __other) + _NOEXCEPT_(is_nothrow_copy_assignable::value) { + __front_cap_ = __other.__front_cap_; + __begin_ = __other.__begin_; + __end_ = __other.__end_; + __back_cap_ = __other.__back_cap_; + } + +private: + pointer __front_cap_ = nullptr; + pointer __begin_ = nullptr; + pointer __end_ = nullptr; + _LIBCPP_COMPRESSED_PAIR(pointer, __back_cap_, allocator_type, __alloc_); + + template + friend class __split_buffer_pointer_layout; +}; + +template +class __split_buffer_size_layout { +protected: + using value_type = _Tp; + using allocator_type = _Allocator; + using __alloc_traits _LIBCPP_NODEBUG = allocator_traits; + using reference = value_type&; + using const_reference = const value_type&; + using size_type = typename __alloc_traits::size_type; + using difference_type = typename __alloc_traits::difference_type; + using pointer = typename __alloc_traits::pointer; + using const_pointer = typename __alloc_traits::const_pointer; + using iterator = pointer; + using const_iterator = const_pointer; + using __sentinel_type _LIBCPP_NODEBUG = size_type; + +public: + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer_size_layout() = default; + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer_size_layout(const allocator_type& __alloc) + : __alloc_(__alloc) {} + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer __front_cap() _NOEXCEPT { return __front_cap_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_pointer __front_cap() const _NOEXCEPT { + return __front_cap_; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer begin() _NOEXCEPT { return __begin_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_pointer begin() const _NOEXCEPT { return __begin_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer end() _NOEXCEPT { return __begin_ + __size_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer end() const _NOEXCEPT { return __begin_ + __size_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __size_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __size_ == 0; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type capacity() const _NOEXCEPT { return __cap_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI allocator_type& __get_allocator() _NOEXCEPT { return __alloc_; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI allocator_type const& __get_allocator() const _NOEXCEPT { + return __alloc_; + } + + // Returns the sentinel object directly. Should be used in conjunction with automatic type deduction, + // not explicit types. + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __sentinel_type __raw_sentinel() const _NOEXCEPT { + return __size_; + } + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __sentinel_type __raw_capacity() const _NOEXCEPT { + return __cap_; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_data(pointer __new_first) _NOEXCEPT { + __front_cap_ = __new_first; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void + __set_valid_range(pointer __new_begin, pointer __new_end) _NOEXCEPT { + // Size-based __split_buffers track their size directly: we need to explicitly update the size + // when the front is adjusted. + __size_ -= __new_begin - __begin_; + __begin_ = __new_begin; + __set_sentinel(__new_end); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void + __set_valid_range(pointer __new_begin, size_type __new_size) _NOEXCEPT { + // Size-based __split_buffers track their size directly: we need to explicitly update the size + // when the front is adjusted. + __size_ -= __new_begin - __begin_; + __begin_ = __new_begin; + __set_sentinel(__new_size); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_sentinel(pointer __new_end) _NOEXCEPT { + _LIBCPP_ASSERT_INTERNAL(__front_cap_ <= __new_end, "__new_end cannot precede __front_cap_"); + __size_ += __new_end - end(); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_sentinel(size_type __new_size) _NOEXCEPT { + __size_ = __new_size; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_capacity(size_type __new_capacity) _NOEXCEPT { + __cap_ = __new_capacity; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_capacity(pointer __new_capacity) _NOEXCEPT { + __cap_ = __new_capacity - __begin_; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __front_spare() const _NOEXCEPT { + return static_cast(__begin_ - __front_cap_); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __back_spare() const _NOEXCEPT { + // `__cap_ - __end_` tells us the total number of spares when in size-mode. We need to remove + // the __front_spare from the count. + return __cap_ - __size_ - __front_spare(); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference back() _NOEXCEPT { return __begin_[__size_ - 1]; } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT { + return __begin_[__size_ - 1]; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __swap_without_allocator( + __split_buffer_pointer_layout<__split_buffer, + value_type, + allocator_type>& __other) _NOEXCEPT { + std::swap(__front_cap_, __other.__front_cap_); + std::swap(__begin_, __other.__begin_); + std::swap(__cap_, __other.__cap_); + std::swap(__size_, __other.__size_); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void swap(__split_buffer_size_layout& __other) _NOEXCEPT { + std::swap(__front_cap_, __other.__front_cap_); + std::swap(__begin_, __other.__begin_); + std::swap(__cap_, __other.__cap_); + std::swap(__size_, __other.__size_); + std::__swap_allocator(__alloc_, __other.__alloc_); + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __reset() _NOEXCEPT { + __front_cap_ = nullptr; + __begin_ = nullptr; + __size_ = 0; + __cap_ = 0; + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void + __copy_without_alloc(__split_buffer_size_layout const& __other) + _NOEXCEPT_(is_nothrow_copy_assignable::value) { + __front_cap_ = __other.__front_cap_; + __begin_ = __other.__begin_; + __cap_ = __other.__cap_; + __size_ = __other.__size_; + } + +private: + pointer __front_cap_ = nullptr; + pointer __begin_ = nullptr; + size_type __size_ = 0; + size_type __cap_ = 0; + _LIBCPP_NO_UNIQUE_ADDRESS allocator_type __alloc_; + + template + friend class __split_buffer_size_layout; +}; + +// `__split_buffer` is a contiguous array data structure. It may hold spare capacity at both ends of +// the sequence. This allows for a `__split_buffer` to grow from both the front and the back without +// relocating its contents until it runs out of room. This characteristic sets it apart from +// `std::vector`, which only holds spare capacity at its end. As such, `__split_buffer` is useful +// for implementing both `std::vector` and `std::deque`. +// +// The sequence is stored as a contiguous chunk of memory delimited by the following "pointers" (`o` denotes +// uninitialized memory and `x` denotes a valid object): +// +// |oooooooooooooooooooxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxoooooooooooooooooooooooo| +// ^ ^ ^ ^ +// __front_cap_ __begin_ __end_ __back_cap_ +// +// The range [__front_cap_, __begin_) contains uninitialized memory. It is referred to as the "front spare capacity". +// The range [__begin_, __end_) contains valid objects. It is referred to as the "valid range". +// The range [__end_, __back_cap_) contains uninitialized memory. It is referred to as the "back spare capacity". +// +// The layout of `__split_buffer` is determined by the `_Layout` template template parameter. This +// `_Layout` allows the above pointers to be stored as different representations, such as integer +// offsets. A layout class template must provide the following interface: +// +// template +// class __layout { +// protected: +// using value_type = _Tp; +// using allocator_type = _Allocator; +// using __alloc_traits = allocator_traits; +// using reference = value_type&; +// using const_reference = const value_type&; +// using size_type = typename __alloc_traits::size_type; +// using difference_type = typename __alloc_traits::difference_type; +// using pointer = typename __alloc_traits::pointer; +// using const_pointer = typename __alloc_traits::const_pointer; +// using iterator = pointer; +// using const_iterator = const_pointer; +// using __sentinel_type = /* type that represents the layout's sentinel */; +// +// public: +// __layout() = default; +// explicit __layout(const allocator_type&); +// +// pointer __front_cap(); +// const_pointer __front_cap() const; +// +// pointer begin(); +// const_pointer begin() const; +// +// pointer end(); +// pointer end() const; +// +// size_type size() const; +// bool empty() const; +// size_type capacity() const; +// +// allocator_type& __get_allocator(); +// allocator_type const& __get_allocator() const; +// +// __sentinel_type __raw_sentinel() const; +// __sentinel_type __raw_capacity() const; +// +// void __set_data(pointer); +// void __set_valid_range(pointer __begin, pointer __end); +// void __set_valid_range(pointer __begin, size_type __size); +// void __set_sentinel(pointer __end); +// void __set_sentinel(size_type __size); +// +// void __set_capacity(size_type __capacity); +// void __set_capacity(pointer __capacity); +// +// size_type __front_spare() const; +// size_type __back_spare() const; +// +// reference back(); +// const_reference back() const; +// +// template +// void __swap_without_allocator(_OtherLayout&); +// void swap(__layout&); +// +// void __reset(); +// void __copy_without_alloc(__layout const&); +// }; +// +template class _Layout> +class __split_buffer : _Layout<__split_buffer<_Tp, _Allocator, _Layout>, _Tp, _Allocator> { + using __base_type _LIBCPP_NODEBUG = _Layout<__split_buffer<_Tp, _Allocator, _Layout>, _Tp, _Allocator>; + +public: + using __base_type::__back_spare; + using __base_type::__copy_without_alloc; + using __base_type::__front_cap; + using __base_type::__front_spare; + using __base_type::__get_allocator; + using __base_type::__raw_capacity; + using __base_type::__raw_sentinel; + using __base_type::__reset; + using __base_type::__set_capacity; + using __base_type::__set_data; + using __base_type::__set_sentinel; + using __base_type::__set_valid_range; + + using typename __base_type::__alloc_traits; + using typename __base_type::allocator_type; + using typename __base_type::const_iterator; + using typename __base_type::const_pointer; + using typename __base_type::const_reference; + using typename __base_type::difference_type; + using typename __base_type::iterator; + using typename __base_type::pointer; + using typename __base_type::reference; + using typename __base_type::size_type; + using typename __base_type::value_type; // A __split_buffer contains the following members which may be trivially relocatable: // - pointer: may be trivially relocatable, so it's checked @@ -73,36 +478,24 @@ public: __libcpp_is_trivially_relocatable::value && __libcpp_is_trivially_relocatable::value, __split_buffer, void>; - using __replaceable _LIBCPP_NODEBUG = - __conditional_t<__is_replaceable_v && __container_allocator_is_replaceable<__alloc_traits>::value, - __split_buffer, - void>; - - pointer __first_; - pointer __begin_; - pointer __end_; - _LIBCPP_COMPRESSED_PAIR(pointer, __cap_, allocator_type, __alloc_); __split_buffer(const __split_buffer&) = delete; __split_buffer& operator=(const __split_buffer&) = delete; - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer() - _NOEXCEPT_(is_nothrow_default_constructible::value) - : __first_(nullptr), __begin_(nullptr), __end_(nullptr), __cap_(nullptr) {} + _LIBCPP_HIDE_FROM_ABI __split_buffer() = default; - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer(__alloc_rr& __a) - : __first_(nullptr), __begin_(nullptr), __end_(nullptr), __cap_(nullptr), __alloc_(__a) {} + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer(allocator_type& __a) : __base_type(__a) {} - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer(const __alloc_rr& __a) - : __first_(nullptr), __begin_(nullptr), __end_(nullptr), __cap_(nullptr), __alloc_(__a) {} + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer(const allocator_type& __a) + : __base_type(__a) {} _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI - __split_buffer(size_type __cap, size_type __start, __alloc_rr& __a); + __split_buffer(size_type __cap, size_type __start, allocator_type& __a); _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer(__split_buffer&& __c) _NOEXCEPT_(is_nothrow_move_constructible::value); - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer(__split_buffer&& __c, const __alloc_rr& __a); + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer(__split_buffer&& __c, const allocator_type& __a); _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer& operator=(__split_buffer&& __c) _NOEXCEPT_((__alloc_traits::propagate_on_container_move_assignment::value && @@ -111,36 +504,16 @@ public: _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI ~__split_buffer(); - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __begin_; } - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __begin_; } + using __base_type::back; + using __base_type::begin; + using __base_type::capacity; + using __base_type::empty; + using __base_type::end; + using __base_type::size; - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __end_; } - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __end_; } - - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void clear() _NOEXCEPT { __destruct_at_end(__begin_); } - - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type size() const { - return static_cast(__end_ - __begin_); - } - - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool empty() const { return __end_ == __begin_; } - - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type capacity() const { - return static_cast(__cap_ - __first_); - } - - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __front_spare() const { - return static_cast(__begin_ - __first_); - } - - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __back_spare() const { - return static_cast(__cap_ - __end_); - } - - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference front() { return *__begin_; } - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference front() const { return *__begin_; } - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference back() { return *(__end_ - 1); } - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference back() const { return *(__end_ - 1); } + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void clear() _NOEXCEPT { __destruct_at_end(begin()); } + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference front() { return *begin(); } + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference front() const { return *begin(); } _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void shrink_to_fit() _NOEXCEPT; @@ -149,8 +522,8 @@ public: template _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void emplace_back(_Args&&... __args); - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void pop_front() { __destruct_at_begin(__begin_ + 1); } - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void pop_back() { __destruct_at_end(__end_ - 1); } + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void pop_front() { __destruct_at_begin(begin() + 1); } + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void pop_back() { __destruct_at_end(end() - 1); } _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __construct_at_end(size_type __n); _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __construct_at_end(size_type __n, const_reference __x); @@ -182,244 +555,242 @@ public: _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __destruct_at_end(pointer __new_last, true_type) _NOEXCEPT; _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void swap(__split_buffer& __x) - _NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v<__alloc_rr>); + _NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v); - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __invariants() const; + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __invariants() const { + if (__front_cap() == nullptr) { + if (begin() != nullptr) + return false; + + if (!empty()) + return false; + + if (capacity() != 0) + return false; + + return true; + } else { + if (begin() < __front_cap()) + return false; + + if (capacity() < size()) + return false; + + if (end() < begin()) + return false; + + return true; + } + } + + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void + __swap_without_allocator(__split_buffer& __other) _NOEXCEPT { + __base_type::__swap_without_allocator(__other); + } private: _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__split_buffer& __c, true_type) _NOEXCEPT_(is_nothrow_move_assignable::value) { - __alloc_ = std::move(__c.__alloc_); + __get_allocator() = std::move(__c.__get_allocator()); } _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__split_buffer&, false_type) _NOEXCEPT {} struct _ConstructTransaction { _LIBCPP_CONSTEXPR_SINCE_CXX20 - _LIBCPP_HIDE_FROM_ABI explicit _ConstructTransaction(pointer* __p, size_type __n) _NOEXCEPT - : __pos_(*__p), - __end_(*__p + __n), - __dest_(__p) {} + _LIBCPP_HIDE_FROM_ABI explicit _ConstructTransaction(__split_buffer* __parent, pointer __p, size_type __n) _NOEXCEPT + : __pos_(__p), + __end_(__p + __n), + __parent_(__parent) {} - _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI ~_ConstructTransaction() { *__dest_ = __pos_; } + _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI ~_ConstructTransaction() { __parent_->__set_sentinel(__pos_); } pointer __pos_; const pointer __end_; private: - pointer* __dest_; + __split_buffer* __parent_; }; + + template class _L2> + friend class __split_buffer; }; -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 bool __split_buffer<_Tp, _Allocator>::__invariants() const { - if (__first_ == nullptr) { - if (__begin_ != nullptr) - return false; - if (__end_ != nullptr) - return false; - if (__cap_ != nullptr) - return false; - } else { - if (__begin_ < __first_) - return false; - if (__end_ < __begin_) - return false; - if (__cap_ < __end_) - return false; - } - return true; -} - -// Default constructs __n objects starting at __end_ +// Default constructs __n objects starting at `end()` // throws if construction throws // Precondition: __n > 0 // Precondition: size() + __n <= capacity() // Postcondition: size() == size() + __n -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n) { - _ConstructTransaction __tx(std::addressof(this->__end_), __n); +template class _Layout> +_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end(size_type __n) { + _ConstructTransaction __tx(this, end(), __n); for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_) { - __alloc_traits::construct(__alloc_, std::__to_address(__tx.__pos_)); + __alloc_traits::construct(__get_allocator(), std::__to_address(__tx.__pos_)); } } -// Copy constructs __n objects starting at __end_ from __x +// Copy constructs __n objects starting at `end()` from __x // throws if construction throws // Precondition: __n > 0 // Precondition: size() + __n <= capacity() // Postcondition: size() == old size() + __n // Postcondition: [i] == __x for all i in [size() - __n, __n) -template +template class _Layout> _LIBCPP_CONSTEXPR_SINCE_CXX20 void -__split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x) { - _ConstructTransaction __tx(std::addressof(this->__end_), __n); +__split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end(size_type __n, const_reference __x) { + _ConstructTransaction __tx(this, end(), __n); for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_) { - __alloc_traits::construct(__alloc_, std::__to_address(__tx.__pos_), __x); + __alloc_traits::construct(__get_allocator(), std::__to_address(__tx.__pos_), __x); } } -template +template class _Layout> template _LIBCPP_CONSTEXPR_SINCE_CXX20 void -__split_buffer<_Tp, _Allocator>::__construct_at_end_with_sentinel(_Iterator __first, _Sentinel __last) { - __alloc_rr& __a = __alloc_; +__split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end_with_sentinel(_Iterator __first, _Sentinel __last) { + allocator_type& __a = __get_allocator(); for (; __first != __last; ++__first) { - if (__end_ == __cap_) { - size_type __old_cap = __cap_ - __first_; + if (__back_spare() == 0) { + size_type __old_cap = capacity(); size_type __new_cap = std::max(2 * __old_cap, 8); __split_buffer __buf(__new_cap, 0, __a); - for (pointer __p = __begin_; __p != __end_; ++__p, (void)++__buf.__end_) - __alloc_traits::construct(__buf.__alloc_, std::__to_address(__buf.__end_), std::move(*__p)); + pointer __buf_end = __buf.end(); + pointer __end = end(); + for (pointer __p = begin(); __p != __end; ++__p) { + __alloc_traits::construct(__buf.__get_allocator(), std::__to_address(__buf_end), std::move(*__p)); + __buf.__set_sentinel(++__buf_end); + } swap(__buf); } - __alloc_traits::construct(__a, std::__to_address(this->__end_), *__first); - ++this->__end_; + + __alloc_traits::construct(__a, std::__to_address(end()), *__first); + __set_sentinel(size() + 1); } } -template + +template class _Layout> template ::value, int> > _LIBCPP_CONSTEXPR_SINCE_CXX20 void -__split_buffer<_Tp, _Allocator>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last) { +__split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last) { __construct_at_end_with_size(__first, std::distance(__first, __last)); } -template +template class _Layout> template _LIBCPP_CONSTEXPR_SINCE_CXX20 void -__split_buffer<_Tp, _Allocator>::__construct_at_end_with_size(_ForwardIterator __first, size_type __n) { - _ConstructTransaction __tx(std::addressof(this->__end_), __n); +__split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end_with_size(_ForwardIterator __first, size_type __n) { + _ConstructTransaction __tx(this, end(), __n); for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_, (void)++__first) { - __alloc_traits::construct(__alloc_, std::__to_address(__tx.__pos_), *__first); + __alloc_traits::construct(__get_allocator(), std::__to_address(__tx.__pos_), *__first); } } -template +template class _Layout> _LIBCPP_CONSTEXPR_SINCE_CXX20 inline void -__split_buffer<_Tp, _Allocator>::__destruct_at_begin(pointer __new_begin, false_type) { - while (__begin_ != __new_begin) - __alloc_traits::destroy(__alloc_, std::__to_address(__begin_++)); +__split_buffer<_Tp, _Allocator, _Layout>::__destruct_at_begin(pointer __new_begin, false_type) { + pointer __begin = begin(); + // Updating begin at every iteration is unnecessary because destruction can't throw. + while (__begin != __new_begin) + __alloc_traits::destroy(__get_allocator(), std::__to_address(__begin++)); + __set_valid_range(__begin, end()); } -template +template class _Layout> _LIBCPP_CONSTEXPR_SINCE_CXX20 inline void -__split_buffer<_Tp, _Allocator>::__destruct_at_begin(pointer __new_begin, true_type) { - __begin_ = __new_begin; +__split_buffer<_Tp, _Allocator, _Layout>::__destruct_at_begin(pointer __new_begin, true_type) { + __set_valid_range(__new_begin, end()); } -template +template class _Layout> _LIBCPP_CONSTEXPR_SINCE_CXX20 inline _LIBCPP_HIDE_FROM_ABI void -__split_buffer<_Tp, _Allocator>::__destruct_at_end(pointer __new_last, false_type) _NOEXCEPT { - while (__new_last != __end_) - __alloc_traits::destroy(__alloc_, std::__to_address(--__end_)); +__split_buffer<_Tp, _Allocator, _Layout>::__destruct_at_end(pointer __new_last, false_type) _NOEXCEPT { + pointer __end = end(); + // Updating begin at every iteration is unnecessary because destruction can't throw. + while (__new_last != __end) + __alloc_traits::destroy(__get_allocator(), std::__to_address(--__end)); + __set_sentinel(__end); } -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 inline _LIBCPP_HIDE_FROM_ABI void -__split_buffer<_Tp, _Allocator>::__destruct_at_end(pointer __new_last, true_type) _NOEXCEPT { - __end_ = __new_last; -} - -template +template class _Layout> _LIBCPP_CONSTEXPR_SINCE_CXX20 -__split_buffer<_Tp, _Allocator>::__split_buffer(size_type __cap, size_type __start, __alloc_rr& __a) - : __cap_(nullptr), __alloc_(__a) { - if (__cap == 0) { - __first_ = nullptr; - } else { - auto __allocation = std::__allocate_at_least(__alloc_, __cap); - __first_ = __allocation.ptr; - __cap = __allocation.count; +__split_buffer<_Tp, _Allocator, _Layout>::__split_buffer(size_type __cap, size_type __start, allocator_type& __a) + : __base_type(__a) { + _LIBCPP_ASSERT_INTERNAL(__cap >= __start, "can't have a start point outside the capacity"); + if (__cap > 0) { + auto __allocation = std::__allocate_at_least(__get_allocator(), __cap); + __set_data(__allocation.ptr); + __cap = __allocation.count; } - __begin_ = __end_ = __first_ + __start; - __cap_ = __first_ + __cap; + + pointer __begin = __front_cap() + __start; + __set_valid_range(__begin, __begin); + __set_capacity(__cap); } -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator>::~__split_buffer() { +template class _Layout> +_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator, _Layout>::~__split_buffer() { clear(); - if (__first_) - __alloc_traits::deallocate(__alloc_, __first_, capacity()); + if (__front_cap()) + __alloc_traits::deallocate(__get_allocator(), __front_cap(), capacity()); } -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator>::__split_buffer(__split_buffer&& __c) +template class _Layout> +_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator, _Layout>::__split_buffer(__split_buffer&& __c) _NOEXCEPT_(is_nothrow_move_constructible::value) - : __first_(std::move(__c.__first_)), - __begin_(std::move(__c.__begin_)), - __end_(std::move(__c.__end_)), - __cap_(std::move(__c.__cap_)), - __alloc_(std::move(__c.__alloc_)) { - __c.__first_ = nullptr; - __c.__begin_ = nullptr; - __c.__end_ = nullptr; - __c.__cap_ = nullptr; + : __base_type(std::move(__c)) { + __c.__reset(); } -template +template class _Layout> _LIBCPP_CONSTEXPR_SINCE_CXX20 -__split_buffer<_Tp, _Allocator>::__split_buffer(__split_buffer&& __c, const __alloc_rr& __a) - : __cap_(nullptr), __alloc_(__a) { - if (__a == __c.__alloc_) { - __first_ = __c.__first_; - __begin_ = __c.__begin_; - __end_ = __c.__end_; - __cap_ = __c.__cap_; - __c.__first_ = nullptr; - __c.__begin_ = nullptr; - __c.__end_ = nullptr; - __c.__cap_ = nullptr; +__split_buffer<_Tp, _Allocator, _Layout>::__split_buffer(__split_buffer&& __c, const allocator_type& __a) + : __base_type(__a) { + if (__a == __c.__get_allocator()) { + __set_data(__c.__front_cap()); + __set_valid_range(__c.begin(), __c.end()); + __set_capacity(__c.capacity()); + __c.__reset(); } else { - auto __allocation = std::__allocate_at_least(__alloc_, __c.size()); - __first_ = __allocation.ptr; - __begin_ = __end_ = __first_; - __cap_ = __first_ + __allocation.count; + auto __allocation = std::__allocate_at_least(__get_allocator(), __c.size()); + __set_data(__allocation.ptr); + __set_valid_range(__front_cap(), __front_cap()); + __set_capacity(__allocation.count); typedef move_iterator _Ip; __construct_at_end(_Ip(__c.begin()), _Ip(__c.end())); } } -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator>& -__split_buffer<_Tp, _Allocator>::operator=(__split_buffer&& __c) +template class _Layout> +_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator, _Layout>& +__split_buffer<_Tp, _Allocator, _Layout>::operator=(__split_buffer&& __c) _NOEXCEPT_((__alloc_traits::propagate_on_container_move_assignment::value && is_nothrow_move_assignable::value) || !__alloc_traits::propagate_on_container_move_assignment::value) { clear(); shrink_to_fit(); - __first_ = __c.__first_; - __begin_ = __c.__begin_; - __end_ = __c.__end_; - __cap_ = __c.__cap_; + __copy_without_alloc(__c); __move_assign_alloc(__c, integral_constant()); - __c.__first_ = __c.__begin_ = __c.__end_ = __c.__cap_ = nullptr; + __c.__reset(); return *this; } -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::swap(__split_buffer& __x) - _NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v<__alloc_rr>) { - std::swap(__first_, __x.__first_); - std::swap(__begin_, __x.__begin_); - std::swap(__end_, __x.__end_); - std::swap(__cap_, __x.__cap_); - std::__swap_allocator(__alloc_, __x.__alloc_); +template class _Layout> +_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::swap(__split_buffer& __x) + _NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v) { + __base_type::swap(__x); } -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::shrink_to_fit() _NOEXCEPT { +template class _Layout> +_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::shrink_to_fit() _NOEXCEPT { if (capacity() > size()) { #if _LIBCPP_HAS_EXCEPTIONS try { #endif // _LIBCPP_HAS_EXCEPTIONS - __split_buffer __t(size(), 0, __alloc_); + __split_buffer __t(size(), 0, __get_allocator()); if (__t.capacity() < capacity()) { - __t.__construct_at_end(move_iterator(__begin_), move_iterator(__end_)); - __t.__end_ = __t.__begin_ + (__end_ - __begin_); - std::swap(__first_, __t.__first_); - std::swap(__begin_, __t.__begin_); - std::swap(__end_, __t.__end_); - std::swap(__cap_, __t.__cap_); + __t.__construct_at_end(move_iterator(begin()), move_iterator(end())); + __t.__set_sentinel(size()); + __swap_without_allocator(__t); } #if _LIBCPP_HAS_EXCEPTIONS } catch (...) { @@ -428,55 +799,56 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::shrink_to_fi } } -template +template class _Layout> template -_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::emplace_front(_Args&&... __args) { - if (__begin_ == __first_) { - if (__end_ < __cap_) { - difference_type __d = __cap_ - __end_; +_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::emplace_front(_Args&&... __args) { + if (__front_spare() == 0) { + pointer __end = end(); + if (__back_spare() > 0) { + // The elements are pressed up against the front of the buffer: we need to move them back a + // little bit to make `emplace_front` have amortised O(1) complexity. + difference_type __d = __back_spare(); __d = (__d + 1) / 2; - __begin_ = std::move_backward(__begin_, __end_, __end_ + __d); - __end_ += __d; + auto __new_end = __end + __d; + __set_valid_range(std::move_backward(begin(), __end, __new_end), __new_end); } else { - size_type __c = std::max(2 * static_cast(__cap_ - __first_), 1); - __split_buffer __t(__c, (__c + 3) / 4, __alloc_); - __t.__construct_at_end(move_iterator(__begin_), move_iterator(__end_)); - std::swap(__first_, __t.__first_); - std::swap(__begin_, __t.__begin_); - std::swap(__end_, __t.__end_); - std::swap(__cap_, __t.__cap_); + size_type __c = std::max(2 * capacity(), 1); + __split_buffer __t(__c, (__c + 3) / 4, __get_allocator()); + __t.__construct_at_end(move_iterator(begin()), move_iterator(__end)); + __base_type::__swap_without_allocator(__t); } } - __alloc_traits::construct(__alloc_, std::__to_address(__begin_ - 1), std::forward<_Args>(__args)...); - --__begin_; + + __alloc_traits::construct(__get_allocator(), std::__to_address(begin() - 1), std::forward<_Args>(__args)...); + __set_valid_range(begin() - 1, size() + 1); } -template +template class _Layout> template -_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::emplace_back(_Args&&... __args) { - if (__end_ == __cap_) { - if (__begin_ > __first_) { - difference_type __d = __begin_ - __first_; +_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::emplace_back(_Args&&... __args) { + pointer __end = end(); + if (__back_spare() == 0) { + if (__front_spare() > 0) { + difference_type __d = __front_spare(); __d = (__d + 1) / 2; - __end_ = std::move(__begin_, __end_, __begin_ - __d); - __begin_ -= __d; + __end = std::move(begin(), __end, begin() - __d); + __set_valid_range(begin() - __d, __end); } else { - size_type __c = std::max(2 * static_cast(__cap_ - __first_), 1); - __split_buffer __t(__c, __c / 4, __alloc_); - __t.__construct_at_end(move_iterator(__begin_), move_iterator(__end_)); - std::swap(__first_, __t.__first_); - std::swap(__begin_, __t.__begin_); - std::swap(__end_, __t.__end_); - std::swap(__cap_, __t.__cap_); + size_type __c = std::max(2 * capacity(), 1); + __split_buffer __t(__c, __c / 4, __get_allocator()); + __t.__construct_at_end(move_iterator(begin()), move_iterator(__end)); + __base_type::__swap_without_allocator(__t); } } - __alloc_traits::construct(__alloc_, std::__to_address(__end_), std::forward<_Args>(__args)...); - ++__end_; + + __alloc_traits::construct(__get_allocator(), std::__to_address(__end), std::forward<_Args>(__args)...); + __set_sentinel(++__end); } -template +template class _Layout> _LIBCPP_CONSTEXPR_SINCE_CXX20 inline _LIBCPP_HIDE_FROM_ABI void -swap(__split_buffer<_Tp, _Allocator>& __x, __split_buffer<_Tp, _Allocator>& __y) _NOEXCEPT_(_NOEXCEPT_(__x.swap(__y))) { +swap(__split_buffer<_Tp, _Allocator, _Layout>& __x, __split_buffer<_Tp, _Allocator, _Layout>& __y) + _NOEXCEPT_(_NOEXCEPT_(__x.swap(__y))) { __x.swap(__y); } diff --git a/lib/libcxx/include/__stop_token/atomic_unique_lock.h b/lib/libcxx/include/__stop_token/atomic_unique_lock.h index 05e8f22316..4b0ae05ca8 100644 --- a/lib/libcxx/include/__stop_token/atomic_unique_lock.h +++ b/lib/libcxx/include/__stop_token/atomic_unique_lock.h @@ -27,7 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD // where State contains a lock bit and might contain other data, // and LockedBit is the value of State when the lock bit is set, e.g 1 << 2 template -class _LIBCPP_AVAILABILITY_SYNC __atomic_unique_lock { +class __atomic_unique_lock { static_assert(std::__popcount(static_cast(_LockedBit)) == 1, "LockedBit must be an integer where only one bit is set"); diff --git a/lib/libcxx/include/__stop_token/stop_callback.h b/lib/libcxx/include/__stop_token/stop_callback.h index a4d7a29953..76d438e096 100644 --- a/lib/libcxx/include/__stop_token/stop_callback.h +++ b/lib/libcxx/include/__stop_token/stop_callback.h @@ -34,7 +34,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_THREADS template -class _LIBCPP_AVAILABILITY_SYNC stop_callback : private __stop_callback_base { +class stop_callback : private __stop_callback_base { static_assert(invocable<_Callback>, "Mandates: stop_callback is instantiated with an argument for the template parameter Callback that " "satisfies invocable."); @@ -91,7 +91,7 @@ private: }; template -_LIBCPP_AVAILABILITY_SYNC stop_callback(stop_token, _Callback) -> stop_callback<_Callback>; +stop_callback(stop_token, _Callback) -> stop_callback<_Callback>; #endif // _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_THREADS diff --git a/lib/libcxx/include/__stop_token/stop_source.h b/lib/libcxx/include/__stop_token/stop_source.h index 85d67efe06..aea9429388 100644 --- a/lib/libcxx/include/__stop_token/stop_source.h +++ b/lib/libcxx/include/__stop_token/stop_source.h @@ -30,7 +30,7 @@ struct nostopstate_t { inline constexpr nostopstate_t nostopstate{}; -class _LIBCPP_AVAILABILITY_SYNC stop_source { +class stop_source { public: _LIBCPP_HIDE_FROM_ABI stop_source() : __state_(new __stop_state()) { __state_->__increment_stop_source_counter(); } diff --git a/lib/libcxx/include/__stop_token/stop_state.h b/lib/libcxx/include/__stop_token/stop_state.h index cc1f1d830e..74fafbdc63 100644 --- a/lib/libcxx/include/__stop_token/stop_state.h +++ b/lib/libcxx/include/__stop_token/stop_state.h @@ -100,7 +100,7 @@ public: return ((__curent_state & __stop_requested_bit) != 0) || ((__curent_state >> __stop_source_counter_shift) != 0); } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool __request_stop() noexcept { + _LIBCPP_HIDE_FROM_ABI bool __request_stop() noexcept { auto __cb_list_lock = __try_lock_for_request_stop(); if (!__cb_list_lock.__owns_lock()) { return false; @@ -137,7 +137,7 @@ public: return true; } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool __add_callback(__stop_callback_base* __cb) noexcept { + _LIBCPP_HIDE_FROM_ABI bool __add_callback(__stop_callback_base* __cb) noexcept { // If it is already stop_requested. Do not try to request it again. const auto __give_up_trying_to_lock_condition = [__cb](__state_t __state) { if ((__state & __stop_requested_bit) != 0) { @@ -164,7 +164,7 @@ public: } // called by the destructor of stop_callback - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void __remove_callback(__stop_callback_base* __cb) noexcept { + _LIBCPP_HIDE_FROM_ABI void __remove_callback(__stop_callback_base* __cb) noexcept { __callback_list_lock __cb_list_lock(__state_); // under below condition, the request_stop call just popped __cb from the list and could execute it now @@ -192,7 +192,7 @@ public: } private: - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI __callback_list_lock __try_lock_for_request_stop() noexcept { + _LIBCPP_HIDE_FROM_ABI __callback_list_lock __try_lock_for_request_stop() noexcept { // If it is already stop_requested, do not try to request stop or lock the list again. const auto __lock_fail_condition = [](__state_t __state) { return (__state & __stop_requested_bit) != 0; }; diff --git a/lib/libcxx/include/__stop_token/stop_token.h b/lib/libcxx/include/__stop_token/stop_token.h index 178b1728c3..4a6ca27ac4 100644 --- a/lib/libcxx/include/__stop_token/stop_token.h +++ b/lib/libcxx/include/__stop_token/stop_token.h @@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_THREADS -class _LIBCPP_AVAILABILITY_SYNC stop_token { +class stop_token { public: _LIBCPP_HIDE_FROM_ABI stop_token() noexcept = default; diff --git a/lib/libcxx/include/__string/char_traits.h b/lib/libcxx/include/__string/char_traits.h index 86c92477cb..d98595030e 100644 --- a/lib/libcxx/include/__string/char_traits.h +++ b/lib/libcxx/include/__string/char_traits.h @@ -94,16 +94,17 @@ struct char_traits { } // TODO: Make this _LIBCPP_HIDE_FROM_ABI - static inline _LIBCPP_HIDDEN _LIBCPP_CONSTEXPR bool eq(char_type __c1, char_type __c2) _NOEXCEPT { + [[__nodiscard__]] static inline _LIBCPP_HIDDEN _LIBCPP_CONSTEXPR bool eq(char_type __c1, char_type __c2) _NOEXCEPT { return __c1 == __c2; } - static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool lt(char_type __c1, char_type __c2) _NOEXCEPT { + [[__nodiscard__]] static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool + lt(char_type __c1, char_type __c2) _NOEXCEPT { return (unsigned char)__c1 < (unsigned char)__c2; } // __constexpr_memcmp requires a trivially lexicographically comparable type, but char is not when char is a signed // type - static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 int + [[__nodiscard__]] static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 int compare(const char_type* __lhs, const char_type* __rhs, size_t __count) _NOEXCEPT { if (__libcpp_is_constant_evaluated()) { #ifdef _LIBCPP_COMPILER_CLANG_BASED @@ -126,11 +127,12 @@ struct char_traits { } } - static inline _LIBCPP_HIDE_FROM_ABI size_t _LIBCPP_CONSTEXPR_SINCE_CXX17 length(const char_type* __s) _NOEXCEPT { + [[__nodiscard__]] static inline _LIBCPP_HIDE_FROM_ABI size_t _LIBCPP_CONSTEXPR_SINCE_CXX17 + length(const char_type* __s) _NOEXCEPT { return std::__constexpr_strlen(__s); } - static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const char_type* + [[__nodiscard__]] static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const char_type* find(const char_type* __s, size_t __n, const char_type& __a) _NOEXCEPT { return std::__constexpr_memchr(__s, __a, __n); } @@ -154,19 +156,24 @@ struct char_traits { return __s; } - static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int_type not_eof(int_type __c) _NOEXCEPT { + [[__nodiscard__]] static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int_type not_eof(int_type __c) _NOEXCEPT { return eq_int_type(__c, eof()) ? ~eof() : __c; } - static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR char_type to_char_type(int_type __c) _NOEXCEPT { + [[__nodiscard__]] static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR char_type + to_char_type(int_type __c) _NOEXCEPT { return char_type(__c); } - static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int_type to_int_type(char_type __c) _NOEXCEPT { + [[__nodiscard__]] static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int_type + to_int_type(char_type __c) _NOEXCEPT { return int_type((unsigned char)__c); } - static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool eq_int_type(int_type __c1, int_type __c2) _NOEXCEPT { + [[__nodiscard__]] static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool + eq_int_type(int_type __c1, int_type __c2) _NOEXCEPT { return __c1 == __c2; } - static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int_type eof() _NOEXCEPT { return int_type(EOF); } + [[__nodiscard__]] static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int_type eof() _NOEXCEPT { + return int_type(EOF); + } }; template @@ -187,11 +194,11 @@ struct __char_traits_base { __lhs = __rhs; } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR bool eq(char_type __lhs, char_type __rhs) _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR bool eq(char_type __lhs, char_type __rhs) _NOEXCEPT { return __lhs == __rhs; } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR bool lt(char_type __lhs, char_type __rhs) _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR bool lt(char_type __lhs, char_type __rhs) _NOEXCEPT { return __lhs < __rhs; } @@ -213,19 +220,22 @@ struct __char_traits_base { return __str; } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR char_type to_char_type(int_type __c) _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR char_type to_char_type(int_type __c) _NOEXCEPT { return char_type(__c); } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int_type to_int_type(char_type __c) _NOEXCEPT { return int_type(__c); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int_type to_int_type(char_type __c) _NOEXCEPT { + return int_type(__c); + } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR bool eq_int_type(int_type __lhs, int_type __rhs) _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR bool + eq_int_type(int_type __lhs, int_type __rhs) _NOEXCEPT { return __lhs == __rhs; } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int_type eof() _NOEXCEPT { return _EOFVal; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int_type eof() _NOEXCEPT { return _EOFVal; } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int_type not_eof(int_type __c) _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int_type not_eof(int_type __c) _NOEXCEPT { return eq_int_type(__c, eof()) ? static_cast(~eof()) : __c; } }; @@ -235,18 +245,19 @@ struct __char_traits_base { #if _LIBCPP_HAS_WIDE_CHARACTERS template <> struct char_traits : __char_traits_base(WEOF)> { - static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 int + [[__nodiscard__]] static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 int compare(const char_type* __s1, const char_type* __s2, size_t __n) _NOEXCEPT { if (__n == 0) return 0; return std::__constexpr_wmemcmp(__s1, __s2, __n); } - static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 size_t length(const char_type* __s) _NOEXCEPT { + [[__nodiscard__]] static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 size_t + length(const char_type* __s) _NOEXCEPT { return std::__constexpr_wcslen(__s); } - static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const char_type* + [[__nodiscard__]] static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const char_type* find(const char_type* __s, size_t __n, const char_type& __a) _NOEXCEPT { return std::__constexpr_wmemchr(__s, __a, __n); } @@ -257,16 +268,16 @@ struct char_traits : __char_traits_base struct char_traits : __char_traits_base(EOF)> { - static _LIBCPP_HIDE_FROM_ABI constexpr int + [[nodiscard]] static _LIBCPP_HIDE_FROM_ABI constexpr int compare(const char_type* __s1, const char_type* __s2, size_t __n) noexcept { return std::__constexpr_memcmp(__s1, __s2, __element_count(__n)); } - static _LIBCPP_HIDE_FROM_ABI constexpr size_t length(const char_type* __str) noexcept { + [[nodiscard]] static _LIBCPP_HIDE_FROM_ABI constexpr size_t length(const char_type* __str) noexcept { return std::__constexpr_strlen(__str); } - _LIBCPP_HIDE_FROM_ABI static constexpr const char_type* + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr const char_type* find(const char_type* __s, size_t __n, const char_type& __a) noexcept { return std::__constexpr_memchr(__s, __a, __n); } @@ -276,11 +287,11 @@ struct char_traits : __char_traits_base struct char_traits : __char_traits_base(0xFFFF)> { - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 int + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 int compare(const char_type* __s1, const char_type* __s2, size_t __n) _NOEXCEPT; _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 size_t length(const char_type* __s) _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 const char_type* + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 const char_type* find(const char_type* __s, size_t __n, const char_type& __a) _NOEXCEPT { __identity __proj; const char_type* __match = std::__find(__s, __s + __n, __a, __proj); @@ -290,7 +301,7 @@ struct char_traits : __char_traits_base::compare(const char_type* __s1, const char_type* __s2, size_t __n) _NOEXCEPT { for (; __n; --__n, ++__s1, ++__s2) { if (lt(*__s1, *__s2)) @@ -301,7 +312,8 @@ char_traits::compare(const char_type* __s1, const char_type* __s2, siz return 0; } -inline _LIBCPP_CONSTEXPR_SINCE_CXX17 size_t char_traits::length(const char_type* __s) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX17 size_t +char_traits::length(const char_type* __s) _NOEXCEPT { size_t __len = 0; for (; !eq(*__s, char_type(0)); ++__s) ++__len; @@ -310,11 +322,11 @@ inline _LIBCPP_CONSTEXPR_SINCE_CXX17 size_t char_traits::length(const template <> struct char_traits : __char_traits_base(0xFFFFFFFF)> { - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 int + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 int compare(const char_type* __s1, const char_type* __s2, size_t __n) _NOEXCEPT; _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 size_t length(const char_type* __s) _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 const char_type* + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 const char_type* find(const char_type* __s, size_t __n, const char_type& __a) _NOEXCEPT { __identity __proj; const char_type* __match = std::__find(__s, __s + __n, __a, __proj); @@ -324,7 +336,7 @@ struct char_traits : __char_traits_base::compare(const char_type* __s1, const char_type* __s2, size_t __n) _NOEXCEPT { for (; __n; --__n, ++__s1, ++__s2) { if (lt(*__s1, *__s2)) @@ -335,7 +347,8 @@ char_traits::compare(const char_type* __s1, const char_type* __s2, siz return 0; } -inline _LIBCPP_CONSTEXPR_SINCE_CXX17 size_t char_traits::length(const char_type* __s) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX17 size_t +char_traits::length(const char_type* __s) _NOEXCEPT { size_t __len = 0; for (; !eq(*__s, char_type(0)); ++__s) ++__len; @@ -369,6 +382,13 @@ _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_CONSTEXPR_SINCE_CXX14 const _CharT* __searc if (__len1 < __len2) return __last1; + if (__builtin_constant_p(__len2 == 1) && __len2 == 1) { + auto __res = _Traits::find(__first1, __len1, *__first2); + if (__res == nullptr) + return __last1; + return __res; + } + // First element of __first2 is loop invariant. _CharT __f2 = *__first2; while (true) { diff --git a/lib/libcxx/include/__string/constexpr_c_functions.h b/lib/libcxx/include/__string/constexpr_c_functions.h index 119669e16b..4b05e862b8 100644 --- a/lib/libcxx/include/__string/constexpr_c_functions.h +++ b/lib/libcxx/include/__string/constexpr_c_functions.h @@ -22,7 +22,6 @@ #include <__type_traits/is_equality_comparable.h> #include <__type_traits/is_integral.h> #include <__type_traits/is_same.h> -#include <__type_traits/is_trivially_copyable.h> #include <__type_traits/is_trivially_lexicographically_comparable.h> #include <__type_traits/remove_cv.h> #include <__utility/element_count.h> @@ -96,14 +95,13 @@ __constexpr_memcmp(const _Tp* __lhs, const _Up* __rhs, __element_count __n) { } } -// Because of __libcpp_is_trivially_equality_comparable we know that comparing the object representations is equivalent +// Because of __is_trivially_equality_comparable_v we know that comparing the object representations is equivalent // to a std::memcmp(...) == 0. Since we have multiple objects contiguously in memory, we can call memcmp once instead // of invoking it on every object individually. template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool __constexpr_memcmp_equal(const _Tp* __lhs, const _Up* __rhs, __element_count __n) { - static_assert(__libcpp_is_trivially_equality_comparable<_Tp, _Up>::value, - "_Tp and _Up have to be trivially equality comparable"); + static_assert(__is_trivially_equality_comparable_v<_Tp, _Up>, "_Tp and _Up have to be trivially equality comparable"); auto __count = static_cast(__n); @@ -128,7 +126,7 @@ __constexpr_memcmp_equal(const _Tp* __lhs, const _Up* __rhs, __element_count __n template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __constexpr_memchr(_Tp* __str, _Up __value, size_t __count) { - static_assert(sizeof(_Tp) == 1 && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value, + static_assert(sizeof(_Tp) == 1 && __is_trivially_equality_comparable_v<_Tp, _Up>, "Calling memchr on non-trivially equality comparable types is unsafe."); if (__libcpp_is_constant_evaluated()) { @@ -225,6 +223,8 @@ __constexpr_memmove(_Tp* __dest, _Up* __src, __element_count __n) { std::__assign_trivially_copyable(__dest[__i], __src[__i]); } } + } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) == __datasizeof_v<_Tp>) { + ::__builtin_memmove(__dest, __src, __count * sizeof(_Tp)); } else if (__count > 0) { ::__builtin_memmove(__dest, __src, (__count - 1) * sizeof(_Tp) + __datasizeof_v<_Tp>); } diff --git a/lib/libcxx/include/__support/xlocale/__strtonum_fallback.h b/lib/libcxx/include/__support/xlocale/__strtonum_fallback.h index 5275aead35..90bd59d36c 100644 --- a/lib/libcxx/include/__support/xlocale/__strtonum_fallback.h +++ b/lib/libcxx/include/__support/xlocale/__strtonum_fallback.h @@ -34,12 +34,4 @@ inline _LIBCPP_HIDE_FROM_ABI long double strtold_l(const char* __nptr, char** __ return ::strtold(__nptr, __endptr); } -inline _LIBCPP_HIDE_FROM_ABI long long strtoll_l(const char* __nptr, char** __endptr, int __base, locale_t) { - return ::strtoll(__nptr, __endptr, __base); -} - -inline _LIBCPP_HIDE_FROM_ABI unsigned long long strtoull_l(const char* __nptr, char** __endptr, int __base, locale_t) { - return ::strtoull(__nptr, __endptr, __base); -} - #endif // _LIBCPP___SUPPORT_XLOCALE_STRTONUM_FALLBACK_H diff --git a/lib/libcxx/include/__system_error/error_category.h b/lib/libcxx/include/__system_error/error_category.h index 7233e22110..7f7c7355c7 100644 --- a/lib/libcxx/include/__system_error/error_category.h +++ b/lib/libcxx/include/__system_error/error_category.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -class _LIBCPP_EXPORTED_FROM_ABI error_condition; +class error_condition; class _LIBCPP_EXPORTED_FROM_ABI error_code; class _LIBCPP_HIDDEN __do_message; @@ -37,11 +37,11 @@ public: error_category(const error_category&) = delete; error_category& operator=(const error_category&) = delete; - virtual const char* name() const _NOEXCEPT = 0; - virtual error_condition default_error_condition(int __ev) const _NOEXCEPT; - virtual bool equivalent(int __code, const error_condition& __condition) const _NOEXCEPT; - virtual bool equivalent(const error_code& __code, int __condition) const _NOEXCEPT; - virtual string message(int __ev) const = 0; + [[__nodiscard__]] virtual const char* name() const _NOEXCEPT = 0; + [[__nodiscard__]] virtual error_condition default_error_condition(int __ev) const _NOEXCEPT; + [[__nodiscard__]] virtual bool equivalent(int __code, const error_condition& __condition) const _NOEXCEPT; + [[__nodiscard__]] virtual bool equivalent(const error_code& __code, int __condition) const _NOEXCEPT; + [[__nodiscard__]] virtual string message(int __ev) const = 0; _LIBCPP_HIDE_FROM_ABI bool operator==(const error_category& __rhs) const _NOEXCEPT { return this == &__rhs; } @@ -67,8 +67,8 @@ public: string message(int __ev) const override; }; -[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI const error_category& generic_category() _NOEXCEPT; -[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI const error_category& system_category() _NOEXCEPT; +[[__gnu__::__const__]] [[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI const error_category& generic_category() _NOEXCEPT; +[[__gnu__::__const__]] [[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI const error_category& system_category() _NOEXCEPT; _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__system_error/error_code.h b/lib/libcxx/include/__system_error/error_code.h index f6ea40d6ef..e904376939 100644 --- a/lib/libcxx/include/__system_error/error_code.h +++ b/lib/libcxx/include/__system_error/error_code.h @@ -71,20 +71,20 @@ public: __cat_ = &system_category(); } - _LIBCPP_HIDE_FROM_ABI int value() const _NOEXCEPT { return __val_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI int value() const _NOEXCEPT { return __val_; } - _LIBCPP_HIDE_FROM_ABI const error_category& category() const _NOEXCEPT { return *__cat_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const error_category& category() const _NOEXCEPT { return *__cat_; } - _LIBCPP_HIDE_FROM_ABI error_condition default_error_condition() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI error_condition default_error_condition() const _NOEXCEPT { return __cat_->default_error_condition(__val_); } - string message() const; + [[__nodiscard__]] string message() const; _LIBCPP_HIDE_FROM_ABI explicit operator bool() const _NOEXCEPT { return __val_ != 0; } }; -inline _LIBCPP_HIDE_FROM_ABI error_code make_error_code(errc __e) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI error_code make_error_code(errc __e) _NOEXCEPT { return error_code(static_cast(__e), generic_category()); } diff --git a/lib/libcxx/include/__system_error/error_condition.h b/lib/libcxx/include/__system_error/error_condition.h index 34819f4b6d..be7deaba04 100644 --- a/lib/libcxx/include/__system_error/error_condition.h +++ b/lib/libcxx/include/__system_error/error_condition.h @@ -80,15 +80,15 @@ public: __cat_ = &generic_category(); } - _LIBCPP_HIDE_FROM_ABI int value() const _NOEXCEPT { return __val_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI int value() const _NOEXCEPT { return __val_; } - _LIBCPP_HIDE_FROM_ABI const error_category& category() const _NOEXCEPT { return *__cat_; } - string message() const; + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const error_category& category() const _NOEXCEPT { return *__cat_; } + [[__nodiscard__]] string message() const; _LIBCPP_HIDE_FROM_ABI explicit operator bool() const _NOEXCEPT { return __val_ != 0; } }; -inline _LIBCPP_HIDE_FROM_ABI error_condition make_error_condition(errc __e) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI error_condition make_error_condition(errc __e) _NOEXCEPT { return error_condition(static_cast(__e), generic_category()); } diff --git a/lib/libcxx/include/__system_error/system_error.h b/lib/libcxx/include/__system_error/system_error.h index 36ccf94cc0..74427d8f0b 100644 --- a/lib/libcxx/include/__system_error/system_error.h +++ b/lib/libcxx/include/__system_error/system_error.h @@ -36,7 +36,7 @@ public: _LIBCPP_HIDE_FROM_ABI system_error(const system_error&) _NOEXCEPT = default; ~system_error() _NOEXCEPT override; - _LIBCPP_HIDE_FROM_ABI const error_code& code() const _NOEXCEPT { return __ec_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const error_code& code() const _NOEXCEPT { return __ec_; } }; // __ev is expected to be an error in the generic_category domain (e.g. from diff --git a/lib/libcxx/include/__thread/id.h b/lib/libcxx/include/__thread/id.h index c9c86c80c8..14a51fc9ee 100644 --- a/lib/libcxx/include/__thread/id.h +++ b/lib/libcxx/include/__thread/id.h @@ -23,7 +23,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_HAS_THREADS -class _LIBCPP_EXPORTED_FROM_ABI __thread_id; +class __thread_id; namespace this_thread { diff --git a/lib/libcxx/include/__thread/jthread.h b/lib/libcxx/include/__thread/jthread.h index 7289b835d3..481ffe296c 100644 --- a/lib/libcxx/include/__thread/jthread.h +++ b/lib/libcxx/include/__thread/jthread.h @@ -36,7 +36,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -class _LIBCPP_AVAILABILITY_SYNC jthread { +class jthread { public: // types using id = thread::id; diff --git a/lib/libcxx/include/__thread/poll_with_backoff.h b/lib/libcxx/include/__thread/poll_with_backoff.h index 4f961fe3f7..e007e7746c 100644 --- a/lib/libcxx/include/__thread/poll_with_backoff.h +++ b/lib/libcxx/include/__thread/poll_with_backoff.h @@ -22,33 +22,50 @@ _LIBCPP_BEGIN_NAMESPACE_STD static _LIBCPP_CONSTEXPR const int __libcpp_polling_count = 64; +enum class __backoff_results : unsigned char { + __continue_poll = 1, + __poll_success = 2, + __timeout = 3, + __backoff_failure = 4, +}; + +enum class __poll_with_backoff_results : unsigned char { + __poll_success = static_cast(__backoff_results::__poll_success), + __timeout = static_cast(__backoff_results::__timeout), + __backoff_failure = static_cast(__backoff_results::__backoff_failure), +}; + // Polls a thread for a condition given by a predicate, and backs off based on a backoff policy // before polling again. // // - __poll is the "test function" that should return true if polling succeeded, and false if it failed. // // - __backoff is the "backoff policy", which is called with the duration since we started polling. It should -// return false in order to resume polling, and true if polling should stop entirely for some reason. +// return __backoff_results::__continue_poll in order to resume polling, and other appropriate __backoff_results +// if polling should stop entirely for some reason. // In general, backoff policies sleep for some time before returning control to the polling loop. // // - __max_elapsed is the maximum duration to try polling for. If the maximum duration is exceeded, -// the polling loop will return false to report a timeout. +// the polling loop will return __poll_with_backoff_results::__timeout to report a timeout. + template -_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool __libcpp_thread_poll_with_backoff( +_LIBCPP_HIDE_FROM_ABI __poll_with_backoff_results __libcpp_thread_poll_with_backoff( _Poll&& __poll, _Backoff&& __backoff, chrono::nanoseconds __max_elapsed = chrono::nanoseconds::zero()) { auto const __start = chrono::high_resolution_clock::now(); for (int __count = 0;;) { if (__poll()) - return true; // __poll completion means success + return __poll_with_backoff_results::__poll_success; if (__count < __libcpp_polling_count) { __count += 1; continue; } chrono::nanoseconds const __elapsed = chrono::high_resolution_clock::now() - __start; if (__max_elapsed != chrono::nanoseconds::zero() && __max_elapsed < __elapsed) - return false; // timeout failure - if (__backoff(__elapsed)) - return false; // __backoff completion means failure + return __poll_with_backoff_results::__timeout; + if (auto __backoff_res = __backoff(__elapsed); __backoff_res == __backoff_results::__continue_poll) + continue; + else + return static_cast<__poll_with_backoff_results>(__backoff_res); } } @@ -59,7 +76,9 @@ _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool __libcpp_thread_poll_with_b // so this should most likely only be used on single-threaded systems where there // are no other threads to compete with. struct __spinning_backoff_policy { - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool operator()(chrono::nanoseconds const&) const { return false; } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __backoff_results operator()(chrono::nanoseconds const&) const { + return __backoff_results::__continue_poll; + } }; _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__thread/support/c11.h b/lib/libcxx/include/__thread/support/c11.h index fe00a2d97f..463c8496ba 100644 --- a/lib/libcxx/include/__thread/support/c11.h +++ b/lib/libcxx/include/__thread/support/c11.h @@ -39,17 +39,17 @@ inline _LIBCPP_HIDE_FROM_ABI int __libcpp_recursive_mutex_init(__libcpp_recursiv return mtx_init(__m, mtx_plain | mtx_recursive) == thrd_success ? 0 : EINVAL; } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int __libcpp_recursive_mutex_lock(__libcpp_recursive_mutex_t* __m) { return mtx_lock(__m) == thrd_success ? 0 : EINVAL; } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS bool +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI bool __libcpp_recursive_mutex_trylock(__libcpp_recursive_mutex_t* __m) { return mtx_trylock(__m) == thrd_success; } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int __libcpp_recursive_mutex_unlock(__libcpp_recursive_mutex_t* __m) { return mtx_unlock(__m) == thrd_success ? 0 : EINVAL; } @@ -59,15 +59,15 @@ inline _LIBCPP_HIDE_FROM_ABI int __libcpp_recursive_mutex_destroy(__libcpp_recur return 0; } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_lock(__libcpp_mutex_t* __m) { +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int __libcpp_mutex_lock(__libcpp_mutex_t* __m) { return mtx_lock(__m) == thrd_success ? 0 : EINVAL; } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS bool __libcpp_mutex_trylock(__libcpp_mutex_t* __m) { +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI bool __libcpp_mutex_trylock(__libcpp_mutex_t* __m) { return mtx_trylock(__m) == thrd_success; } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_unlock(__libcpp_mutex_t* __m) { +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int __libcpp_mutex_unlock(__libcpp_mutex_t* __m) { return mtx_unlock(__m) == thrd_success ? 0 : EINVAL; } @@ -92,12 +92,12 @@ inline _LIBCPP_HIDE_FROM_ABI int __libcpp_condvar_broadcast(__libcpp_condvar_t* return cnd_broadcast(__cv) == thrd_success ? 0 : EINVAL; } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int __libcpp_condvar_wait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m) { return cnd_wait(__cv, __m) == thrd_success ? 0 : EINVAL; } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int __libcpp_condvar_timedwait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m, timespec* __ts) { int __ec = cnd_timedwait(__cv, __m, __ts); return __ec == thrd_timedout ? ETIMEDOUT : __ec; diff --git a/lib/libcxx/include/__thread/support/pthread.h b/lib/libcxx/include/__thread/support/pthread.h index 14e92079da..4cf5c03424 100644 --- a/lib/libcxx/include/__thread/support/pthread.h +++ b/lib/libcxx/include/__thread/support/pthread.h @@ -72,17 +72,17 @@ inline _LIBCPP_HIDE_FROM_ABI int __libcpp_recursive_mutex_init(__libcpp_recursiv return 0; } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int __libcpp_recursive_mutex_lock(__libcpp_recursive_mutex_t* __m) { return pthread_mutex_lock(__m); } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS bool +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI bool __libcpp_recursive_mutex_trylock(__libcpp_recursive_mutex_t* __m) { return pthread_mutex_trylock(__m) == 0; } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int __libcpp_recursive_mutex_unlock(__libcpp_recursive_mutex_t* __m) { return pthread_mutex_unlock(__m); } @@ -91,15 +91,15 @@ inline _LIBCPP_HIDE_FROM_ABI int __libcpp_recursive_mutex_destroy(__libcpp_recur return pthread_mutex_destroy(__m); } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_lock(__libcpp_mutex_t* __m) { +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int __libcpp_mutex_lock(__libcpp_mutex_t* __m) { return pthread_mutex_lock(__m); } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS bool __libcpp_mutex_trylock(__libcpp_mutex_t* __m) { +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI bool __libcpp_mutex_trylock(__libcpp_mutex_t* __m) { return pthread_mutex_trylock(__m) == 0; } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_unlock(__libcpp_mutex_t* __m) { +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int __libcpp_mutex_unlock(__libcpp_mutex_t* __m) { return pthread_mutex_unlock(__m); } @@ -117,12 +117,12 @@ inline _LIBCPP_HIDE_FROM_ABI int __libcpp_condvar_broadcast(__libcpp_condvar_t* return pthread_cond_broadcast(__cv); } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int __libcpp_condvar_wait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m) { return pthread_cond_wait(__cv, __m); } -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int __libcpp_condvar_timedwait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m, __libcpp_timespec_t* __ts) { return pthread_cond_timedwait(__cv, __m, __ts); } diff --git a/lib/libcxx/include/__thread/support/windows.h b/lib/libcxx/include/__thread/support/windows.h index 2921ed900e..558b5c81dc 100644 --- a/lib/libcxx/include/__thread/support/windows.h +++ b/lib/libcxx/include/__thread/support/windows.h @@ -36,22 +36,22 @@ typedef void* __libcpp_recursive_mutex_t[6]; _LIBCPP_EXPORTED_FROM_ABI int __libcpp_recursive_mutex_init(__libcpp_recursive_mutex_t* __m); -_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS _LIBCPP_EXPORTED_FROM_ABI int __libcpp_recursive_mutex_lock(__libcpp_recursive_mutex_t* __m); -_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS bool +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS _LIBCPP_EXPORTED_FROM_ABI bool __libcpp_recursive_mutex_trylock(__libcpp_recursive_mutex_t* __m); -_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS _LIBCPP_EXPORTED_FROM_ABI int __libcpp_recursive_mutex_unlock(__libcpp_recursive_mutex_t* __m); _LIBCPP_EXPORTED_FROM_ABI int __libcpp_recursive_mutex_destroy(__libcpp_recursive_mutex_t* __m); -_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_lock(__libcpp_mutex_t* __m); +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS _LIBCPP_EXPORTED_FROM_ABI int __libcpp_mutex_lock(__libcpp_mutex_t* __m); -_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS bool __libcpp_mutex_trylock(__libcpp_mutex_t* __m); +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS _LIBCPP_EXPORTED_FROM_ABI bool __libcpp_mutex_trylock(__libcpp_mutex_t* __m); -_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_unlock(__libcpp_mutex_t* __m); +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS _LIBCPP_EXPORTED_FROM_ABI int __libcpp_mutex_unlock(__libcpp_mutex_t* __m); _LIBCPP_EXPORTED_FROM_ABI int __libcpp_mutex_destroy(__libcpp_mutex_t* __m); @@ -65,10 +65,10 @@ _LIBCPP_EXPORTED_FROM_ABI int __libcpp_condvar_signal(__libcpp_condvar_t* __cv); _LIBCPP_EXPORTED_FROM_ABI int __libcpp_condvar_broadcast(__libcpp_condvar_t* __cv); -_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS _LIBCPP_EXPORTED_FROM_ABI int __libcpp_condvar_wait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m); -_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS _LIBCPP_EXPORTED_FROM_ABI int __libcpp_condvar_timedwait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m, __libcpp_timespec_t* __ts); _LIBCPP_EXPORTED_FROM_ABI int __libcpp_condvar_destroy(__libcpp_condvar_t* __cv); diff --git a/lib/libcxx/include/__thread/thread.h b/lib/libcxx/include/__thread/thread.h index 1b51571ce3..b2f51aa816 100644 --- a/lib/libcxx/include/__thread/thread.h +++ b/lib/libcxx/include/__thread/thread.h @@ -25,6 +25,8 @@ #include <__thread/support.h> #include <__type_traits/decay.h> #include <__type_traits/enable_if.h> +#include <__type_traits/invoke.h> +#include <__type_traits/is_constructible.h> #include <__type_traits/is_same.h> #include <__type_traits/remove_cvref.h> #include <__utility/forward.h> @@ -155,8 +157,8 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, __thread_id __id) { # ifndef _LIBCPP_CXX03_LANG template -inline _LIBCPP_HIDE_FROM_ABI void __thread_execute(tuple<_TSp, _Fp, _Args...>& __t, __tuple_indices<_Indices...>) { - std::__invoke(std::move(std::get<1>(__t)), std::move(std::get<_Indices>(__t))...); +inline _LIBCPP_HIDE_FROM_ABI void __thread_execute(tuple<_TSp, _Fp, _Args...>& __t, __index_sequence<_Indices...>) { + std::__invoke(std::move(std::get<_Indices + 1>(__t))...); } template @@ -164,8 +166,7 @@ _LIBCPP_HIDE_FROM_ABI void* __thread_proxy(void* __vp) { // _Fp = tuple< unique_ptr<__thread_struct>, Functor, Args...> unique_ptr<_Fp> __p(static_cast<_Fp*>(__vp)); __thread_local_data().set_pointer(std::get<0>(*__p.get()).release()); - typedef typename __make_tuple_indices::value, 2>::type _Index; - std::__thread_execute(*__p.get(), _Index()); + std::__thread_execute(*__p.get(), __make_index_sequence::value - 1>()); return nullptr; } @@ -206,6 +207,10 @@ public: # ifndef _LIBCPP_CXX03_LANG template , thread>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI explicit thread(_Fp&& __f, _Args&&... __args) { + static_assert(is_constructible<__decay_t<_Fp>, _Fp>::value, ""); + static_assert(_And, _Args>...>::value, ""); + static_assert(__is_invocable_v<__decay_t<_Fp>, __decay_t<_Args>...>, ""); + typedef unique_ptr<__thread_struct> _TSPtr; _TSPtr __tsp(new __thread_struct); typedef tuple<_TSPtr, __decay_t<_Fp>, __decay_t<_Args>...> _Gp; @@ -243,13 +248,13 @@ public: _LIBCPP_HIDE_FROM_ABI void swap(thread& __t) _NOEXCEPT { std::swap(__t_, __t.__t_); } - _LIBCPP_HIDE_FROM_ABI bool joinable() const _NOEXCEPT { return !__libcpp_thread_isnull(&__t_); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool joinable() const _NOEXCEPT { return !__libcpp_thread_isnull(&__t_); } void join(); void detach(); - _LIBCPP_HIDE_FROM_ABI id get_id() const _NOEXCEPT { return __libcpp_thread_get_id(&__t_); } - _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() _NOEXCEPT { return __t_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI id get_id() const _NOEXCEPT { return __libcpp_thread_get_id(&__t_); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() _NOEXCEPT { return __t_; } - static unsigned hardware_concurrency() _NOEXCEPT; + [[__nodiscard__]] static unsigned hardware_concurrency() _NOEXCEPT; }; inline _LIBCPP_HIDE_FROM_ABI void swap(thread& __x, thread& __y) _NOEXCEPT { __x.swap(__y); } diff --git a/lib/libcxx/include/__thread/timed_backoff_policy.h b/lib/libcxx/include/__thread/timed_backoff_policy.h index 35a72eb61f..01fe2dd045 100644 --- a/lib/libcxx/include/__thread/timed_backoff_policy.h +++ b/lib/libcxx/include/__thread/timed_backoff_policy.h @@ -11,6 +11,7 @@ #define _LIBCPP___THREAD_TIMED_BACKOFF_POLICY_H #include <__config> +#include <__thread/poll_with_backoff.h> #if _LIBCPP_HAS_THREADS @@ -24,7 +25,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD struct __libcpp_timed_backoff_policy { - _LIBCPP_HIDE_FROM_ABI bool operator()(chrono::nanoseconds __elapsed) const { + _LIBCPP_HIDE_FROM_ABI __backoff_results operator()(chrono::nanoseconds __elapsed) const { if (__elapsed > chrono::milliseconds(128)) __libcpp_thread_sleep_for(chrono::milliseconds(8)); else if (__elapsed > chrono::microseconds(64)) @@ -33,7 +34,7 @@ struct __libcpp_timed_backoff_policy { __libcpp_thread_yield(); else { } // poll - return false; + return __backoff_results::__continue_poll; } }; diff --git a/lib/libcxx/include/__tree b/lib/libcxx/include/__tree index b3c0ece8e5..eb17f7d369 100644 --- a/lib/libcxx/include/__tree +++ b/lib/libcxx/include/__tree @@ -11,37 +11,39 @@ #define _LIBCPP___TREE #include <__algorithm/min.h> +#include <__algorithm/specialized_algorithms.h> #include <__assert> #include <__config> -#include <__fwd/map.h> #include <__fwd/pair.h> -#include <__fwd/set.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> #include <__iterator/next.h> #include <__memory/addressof.h> #include <__memory/allocator_traits.h> #include <__memory/compressed_pair.h> +#include <__memory/construct_at.h> #include <__memory/pointer_traits.h> #include <__memory/swap_allocator.h> #include <__memory/unique_ptr.h> -#include <__type_traits/can_extract_key.h> +#include <__new/launder.h> #include <__type_traits/copy_cvref.h> #include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> -#include <__type_traits/is_const.h> #include <__type_traits/is_constructible.h> #include <__type_traits/is_nothrow_assignable.h> #include <__type_traits/is_nothrow_constructible.h> #include <__type_traits/is_same.h> +#include <__type_traits/is_specialization.h> #include <__type_traits/is_swappable.h> +#include <__type_traits/make_transparent.h> #include <__type_traits/remove_const.h> -#include <__type_traits/remove_const_ref.h> #include <__type_traits/remove_cvref.h> #include <__utility/forward.h> +#include <__utility/lazy_synth_three_way_comparator.h> #include <__utility/move.h> #include <__utility/pair.h> #include <__utility/swap.h> +#include <__utility/try_key_extraction.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -51,14 +53,31 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> -_LIBCPP_BEGIN_NAMESPACE_STD +_LIBCPP_DIAGNOSTIC_PUSH +// GCC complains about the backslashes at the end, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121528 +_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wcomment") +// __tree is a red-black-tree implementation used for the associative containers (i.e. (multi)map/set). It stores +// - (1) a pointer to the node with the smallest (i.e. leftmost) element, namely __begin_node_ +// - (2) the number of nodes in the tree, namely __size_ +// - (3) a pointer to the root of the tree, namely __end_node_ +// +// Storing (1) and (2) is required to allow for constant time lookups. A tree looks like this in memory: +// +// __end_node_ +// | +// root +// / \ +// l1 r1 +// / \ / \ +// ... ... ... ... +// +// All nodes except __end_node_ have a __left_ and __right_ pointer as well as a __parent_ pointer. +// __end_node_ only contains a __left_ pointer, which points to the root of the tree. +// This layout allows for iteration through the tree without a need for special handling of the end node. See +// __tree_next_iter and __tree_prev_iter for more details. +_LIBCPP_DIAGNOSTIC_POP -template -class __tree; -template -class __tree_iterator; -template -class __tree_const_iterator; +_LIBCPP_BEGIN_NAMESPACE_STD template class __tree_end_node; @@ -70,13 +89,6 @@ class __tree_node; template struct __value_type; -template -class __map_node_destructor; -template -class __map_iterator; -template -class __map_const_iterator; - /* _NodePtr algorithms @@ -185,6 +197,11 @@ _LIBCPP_HIDE_FROM_ABI _NodePtr __tree_next(_NodePtr __x) _NOEXCEPT { return __x->__parent_unsafe(); } +// __tree_next_iter and __tree_prev_iter implement iteration through the tree. The order is as follows: +// left sub-tree -> node -> right sub-tree. When the right-most node of a sub-tree is reached, we walk up the tree until +// we find a node where we were in the left sub-tree. We are _always_ in a left sub-tree, since the __end_node_ points +// to the actual root of the tree through a __left_ pointer. Incrementing the end() pointer is UB, so we can assume that +// never happens. template inline _LIBCPP_HIDE_FROM_ABI _EndNodePtr __tree_next_iter(_NodePtr __x) _NOEXCEPT { _LIBCPP_ASSERT_INTERNAL(__x != nullptr, "node shouldn't be null"); @@ -494,16 +511,7 @@ _LIBCPP_HIDE_FROM_ABI void __tree_remove(_NodePtr __root, _NodePtr __z) _NOEXCEP // node traits template -struct __is_tree_value_type_imp : false_type {}; - -template -struct __is_tree_value_type_imp<__value_type<_Key, _Value> > : true_type {}; - -template -struct __is_tree_value_type : false_type {}; - -template -struct __is_tree_value_type<_One> : __is_tree_value_type_imp<__remove_cvref_t<_One> > {}; +inline const bool __is_tree_value_type_v = __is_specialization_v<_Tp, __value_type>; template struct __get_tree_key_type { @@ -549,15 +557,14 @@ private: template class __tree_end_node { public: - typedef _Pointer pointer; + using pointer = _Pointer; pointer __left_; _LIBCPP_HIDE_FROM_ABI __tree_end_node() _NOEXCEPT : __left_() {} }; template -class _LIBCPP_STANDALONE_DEBUG -__tree_node_base : public __tree_end_node<__rebind_pointer_t<_VoidPtr, __tree_node_base<_VoidPtr> > > { +class __tree_node_base : public __tree_end_node<__rebind_pointer_t<_VoidPtr, __tree_node_base<_VoidPtr> > > { public: using pointer = __rebind_pointer_t<_VoidPtr, __tree_node_base>; using __end_node_pointer _LIBCPP_NODEBUG = __rebind_pointer_t<_VoidPtr, __tree_end_node >; @@ -570,20 +577,41 @@ public: _LIBCPP_HIDE_FROM_ABI void __set_parent(pointer __p) { __parent_ = static_cast<__end_node_pointer>(__p); } - ~__tree_node_base() = delete; + _LIBCPP_HIDE_FROM_ABI __tree_node_base() = default; __tree_node_base(__tree_node_base const&) = delete; __tree_node_base& operator=(__tree_node_base const&) = delete; }; template -class _LIBCPP_STANDALONE_DEBUG __tree_node : public __tree_node_base<_VoidPtr> { +class __tree_node : public __tree_node_base<_VoidPtr> { public: using __node_value_type _LIBCPP_NODEBUG = __get_node_value_type_t<_Tp>; - __node_value_type __value_; +// We use a union to avoid initialization during member initialization, which allows us +// to use the allocator from the container to construct the `__node_value_type` in the +// memory provided by the union member +#ifndef _LIBCPP_CXX03_LANG +private: + union { + __node_value_type __value_; + }; + +public: _LIBCPP_HIDE_FROM_ABI __node_value_type& __get_value() { return __value_; } +#else +private: + _ALIGNAS_TYPE(__node_value_type) unsigned char __buffer_[sizeof(__node_value_type)]; + +public: + _LIBCPP_HIDE_FROM_ABI __node_value_type& __get_value() { return *reinterpret_cast<__node_value_type*>(__buffer_); } +#endif + + template + _LIBCPP_HIDE_FROM_ABI explicit __tree_node(_Alloc& __na, _Args&&... __args) { + allocator_traits<_Alloc>::construct(__na, std::addressof(__get_value()), std::forward<_Args>(__args)...); + } ~__tree_node() = delete; __tree_node(__tree_node const&) = delete; __tree_node& operator=(__tree_node const&) = delete; @@ -591,11 +619,11 @@ public: template class __tree_node_destructor { - typedef _Allocator allocator_type; - typedef allocator_traits __alloc_traits; + using allocator_type = _Allocator; + using __alloc_traits _LIBCPP_NODEBUG = allocator_traits; public: - typedef typename __alloc_traits::pointer pointer; + using pointer = typename __alloc_traits::pointer; private: allocator_type& __na_; @@ -612,7 +640,7 @@ public: _LIBCPP_HIDE_FROM_ABI void operator()(pointer __p) _NOEXCEPT { if (__value_constructed) - __alloc_traits::destroy(__na_, std::addressof(__p->__value_)); + __alloc_traits::destroy(__na_, std::addressof(__p->__get_value())); if (__p) __alloc_traits::deallocate(__na_, __p, 1); } @@ -630,12 +658,60 @@ struct __generic_container_node_destructor<__tree_node<_Tp, _VoidPtr>, _Alloc> : }; #endif +// Do an in-order traversal of the tree until `__break` returns true. Takes the root node of the tree. +template +#ifndef _LIBCPP_COMPILER_GCC // This function is recursive, so GCC complains about always_inline. +_LIBCPP_HIDE_FROM_ABI +#endif +bool __tree_iterate_from_root(_Break __break, _NodePtr __root, _Func& __func, _Proj& __proj) { + if (__root->__left_) { + if (std::__tree_iterate_from_root<_Reference>(__break, static_cast<_NodePtr>(__root->__left_), __func, __proj)) + return true; + } + if (__break(__root)) + return true; + std::__invoke(__func, std::__invoke(__proj, static_cast<_Reference>(__root->__get_value()))); + if (__root->__right_) + return std::__tree_iterate_from_root<_Reference>(__break, static_cast<_NodePtr>(__root->__right_), __func, __proj); + return false; +} + +// Do an in-order traversal of the tree from __first to __last. +template +_LIBCPP_HIDE_FROM_ABI void +__tree_iterate_subrange(_NodeIter __first_it, _NodeIter __last_it, _Func& __func, _Proj& __proj) { + using _NodePtr = typename _NodeIter::__node_pointer; + using _Reference = typename _NodeIter::reference; + + auto __first = __first_it.__ptr_; + auto __last = __last_it.__ptr_; + + while (true) { + if (__first == __last) + return; + const auto __nfirst = static_cast<_NodePtr>(__first); + std::__invoke(__func, std::__invoke(__proj, static_cast<_Reference>(__nfirst->__get_value()))); + if (__nfirst->__right_) { + if (std::__tree_iterate_from_root<_Reference>( + [&](_NodePtr __node) -> bool { return __node == __last; }, + static_cast<_NodePtr>(__nfirst->__right_), + __func, + __proj)) + return; + } + while (!std::__tree_is_left_child(static_cast<_NodePtr>(__first))) + __first = static_cast<_NodePtr>(__first)->__parent_; + __first = static_cast<_NodePtr>(__first)->__parent_; + } +} + template class __tree_iterator { - typedef __tree_node_types<_NodePtr> _NodeTypes; - typedef _NodePtr __node_pointer; - typedef typename _NodeTypes::__node_base_pointer __node_base_pointer; - typedef typename _NodeTypes::__end_node_pointer __end_node_pointer; + using _NodeTypes _LIBCPP_NODEBUG = __tree_node_types<_NodePtr>; + // NOLINTNEXTLINE(libcpp-nodebug-on-aliases) lldb relies on this alias for pretty printing + using __node_pointer = _NodePtr; + using __node_base_pointer _LIBCPP_NODEBUG = typename _NodeTypes::__node_base_pointer; + using __end_node_pointer _LIBCPP_NODEBUG = typename _NodeTypes::__end_node_pointer; __end_node_pointer __ptr_; @@ -646,15 +722,12 @@ public: using reference = value_type&; using pointer = __rebind_pointer_t<_NodePtr, value_type>; - _LIBCPP_HIDE_FROM_ABI __tree_iterator() _NOEXCEPT -#if _LIBCPP_STD_VER >= 14 - : __ptr_(nullptr) -#endif - { - } + _LIBCPP_HIDE_FROM_ABI __tree_iterator() _NOEXCEPT : __ptr_(nullptr) {} - _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __get_np()->__value_; } - _LIBCPP_HIDE_FROM_ABI pointer operator->() const { return pointer_traits::pointer_to(__get_np()->__value_); } + _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __get_np()->__get_value(); } + _LIBCPP_HIDE_FROM_ABI pointer operator->() const { + return pointer_traits::pointer_to(__get_np()->__get_value()); + } _LIBCPP_HIDE_FROM_ABI __tree_iterator& operator++() { __ptr_ = std::__tree_next_iter<__end_node_pointer>(static_cast<__node_base_pointer>(__ptr_)); @@ -691,50 +764,54 @@ private: friend class __tree; template friend class __tree_const_iterator; - template - friend class __map_iterator; - template - friend class map; - template - friend class multimap; - template - friend class set; - template - friend class multiset; + + template + friend void __tree_iterate_subrange(_NodeIter, _NodeIter, _Func&, _Proj&); }; +#ifndef _LIBCPP_CXX03_LANG +// This also handles {multi,}set::iterator, since they're just aliases to __tree::iterator +template +struct __specialized_algorithm< + _Algorithm::__for_each, + __iterator_pair<__tree_iterator<_Tp, _NodePtr, _DiffType>, __tree_iterator<_Tp, _NodePtr, _DiffType>>> { + static const bool __has_algorithm = true; + + using __iterator _LIBCPP_NODEBUG = __tree_iterator<_Tp, _NodePtr, _DiffType>; + + template + _LIBCPP_HIDE_FROM_ABI static void operator()(__iterator __first, __iterator __last, _Func& __func, _Proj& __proj) { + std::__tree_iterate_subrange(__first, __last, __func, __proj); + } +}; +#endif + template class __tree_const_iterator { - typedef __tree_node_types<_NodePtr> _NodeTypes; + using _NodeTypes _LIBCPP_NODEBUG = __tree_node_types<_NodePtr>; // NOLINTNEXTLINE(libcpp-nodebug-on-aliases) lldb relies on this alias for pretty printing - using __node_pointer = _NodePtr; - typedef typename _NodeTypes::__node_base_pointer __node_base_pointer; - typedef typename _NodeTypes::__end_node_pointer __end_node_pointer; + using __node_pointer = _NodePtr; + using __node_base_pointer _LIBCPP_NODEBUG = typename _NodeTypes::__node_base_pointer; + using __end_node_pointer _LIBCPP_NODEBUG = typename _NodeTypes::__end_node_pointer; __end_node_pointer __ptr_; public: - using iterator_category = bidirectional_iterator_tag; - using value_type = __get_node_value_type_t<_Tp>; - using difference_type = _DiffType; - using reference = const value_type&; - using pointer = __rebind_pointer_t<_NodePtr, const value_type>; + using iterator_category = bidirectional_iterator_tag; + using value_type = __get_node_value_type_t<_Tp>; + using difference_type = _DiffType; + using reference = const value_type&; + using pointer = __rebind_pointer_t<_NodePtr, const value_type>; + using __non_const_iterator _LIBCPP_NODEBUG = __tree_iterator<_Tp, __node_pointer, difference_type>; - _LIBCPP_HIDE_FROM_ABI __tree_const_iterator() _NOEXCEPT -#if _LIBCPP_STD_VER >= 14 - : __ptr_(nullptr) -#endif - { - } + _LIBCPP_HIDE_FROM_ABI __tree_const_iterator() _NOEXCEPT : __ptr_(nullptr) {} -private: - typedef __tree_iterator<_Tp, __node_pointer, difference_type> __non_const_iterator; - -public: _LIBCPP_HIDE_FROM_ABI __tree_const_iterator(__non_const_iterator __p) _NOEXCEPT : __ptr_(__p.__ptr_) {} - _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __get_np()->__value_; } - _LIBCPP_HIDE_FROM_ABI pointer operator->() const { return pointer_traits::pointer_to(__get_np()->__value_); } + _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __get_np()->__get_value(); } + _LIBCPP_HIDE_FROM_ABI pointer operator->() const { + return pointer_traits::pointer_to(__get_np()->__get_value()); + } _LIBCPP_HIDE_FROM_ABI __tree_const_iterator& operator++() { __ptr_ = std::__tree_next_iter<__end_node_pointer>(static_cast<__node_base_pointer>(__ptr_)); @@ -772,18 +849,28 @@ private: template friend class __tree; - template - friend class map; - template - friend class multimap; - template - friend class set; - template - friend class multiset; - template - friend class __map_const_iterator; + + template + friend void __tree_iterate_subrange(_NodeIter, _NodeIter, _Func&, _Proj&); }; +#ifndef _LIBCPP_CXX03_LANG +// This also handles {multi,}set::const_iterator, since they're just aliases to __tree::iterator +template +struct __specialized_algorithm< + _Algorithm::__for_each, + __iterator_pair<__tree_const_iterator<_Tp, _NodePtr, _DiffType>, __tree_const_iterator<_Tp, _NodePtr, _DiffType>>> { + static const bool __has_algorithm = true; + + using __iterator _LIBCPP_NODEBUG = __tree_const_iterator<_Tp, _NodePtr, _DiffType>; + + template + _LIBCPP_HIDE_FROM_ABI static void operator()(__iterator __first, __iterator __last, _Func& __func, _Proj& __proj) { + std::__tree_iterate_subrange(__first, __last, __func, __proj); + } +}; +#endif + template #ifndef _LIBCPP_CXX03_LANG _LIBCPP_DIAGNOSE_WARNING(!__is_invocable_v<_Compare const&, _Tp const&, _Tp const&>, @@ -794,21 +881,20 @@ int __diagnose_non_const_comparator(); template class __tree { public: - using value_type = __get_node_value_type_t<_Tp>; - typedef _Compare value_compare; - typedef _Allocator allocator_type; + using value_type = __get_node_value_type_t<_Tp>; + using value_compare = _Compare; + using allocator_type = _Allocator; private: - typedef allocator_traits __alloc_traits; - using key_type = __get_tree_key_type_t<_Tp>; + using __alloc_traits _LIBCPP_NODEBUG = allocator_traits; + using key_type = __get_tree_key_type_t<_Tp>; public: - typedef typename __alloc_traits::pointer pointer; - typedef typename __alloc_traits::const_pointer const_pointer; - typedef typename __alloc_traits::size_type size_type; - typedef typename __alloc_traits::difference_type difference_type; + using pointer = typename __alloc_traits::pointer; + using const_pointer = typename __alloc_traits::const_pointer; + using size_type = typename __alloc_traits::size_type; + using difference_type = typename __alloc_traits::difference_type; -public: using __void_pointer _LIBCPP_NODEBUG = typename __alloc_traits::void_pointer; using __node _LIBCPP_NODEBUG = __tree_node<_Tp, __void_pointer>; @@ -821,22 +907,8 @@ public: using __end_node_t _LIBCPP_NODEBUG = __tree_end_node<__node_base_pointer>; using __end_node_pointer _LIBCPP_NODEBUG = __rebind_pointer_t<__void_pointer, __end_node_t>; - using __parent_pointer _LIBCPP_NODEBUG = __end_node_pointer; // TODO: Remove this once the uses in are removed - - typedef __rebind_alloc<__alloc_traits, __node> __node_allocator; - typedef allocator_traits<__node_allocator> __node_traits; - -// TODO(LLVM 22): Remove this check -#ifndef _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB - static_assert(sizeof(__node_base_pointer) == sizeof(__end_node_pointer) && _LIBCPP_ALIGNOF(__node_base_pointer) == - _LIBCPP_ALIGNOF(__end_node_pointer), - "It looks like you are using std::__tree (an implementation detail for (multi)map/set) with a fancy " - "pointer type that thas a different representation depending on whether it points to a __tree base " - "pointer or a __tree node pointer (both of which are implementation details of the standard library). " - "This means that your ABI is being broken between LLVM 19 and LLVM 20. If you don't care about your " - "ABI being broken, define the _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB macro to silence this " - "diagnostic."); -#endif + using __node_allocator _LIBCPP_NODEBUG = __rebind_alloc<__alloc_traits, __node>; + using __node_traits _LIBCPP_NODEBUG = allocator_traits<__node_allocator>; private: // check for sane allocator pointer rebinding semantics. Rebinding the @@ -844,8 +916,8 @@ private: // the pointer using 'pointer_traits'. static_assert(is_same<__node_pointer, typename __node_traits::pointer>::value, "Allocator does not rebind pointers in a sane manner."); - typedef __rebind_alloc<__node_traits, __node_base> __node_base_allocator; - typedef allocator_traits<__node_base_allocator> __node_base_traits; + using __node_base_allocator _LIBCPP_NODEBUG = __rebind_alloc<__node_traits, __node_base>; + using __node_base_traits _LIBCPP_NODEBUG = allocator_traits<__node_base_allocator>; static_assert(is_same<__node_base_pointer, typename __node_base_traits::pointer>::value, "Allocator does not rebind pointers in a sane manner."); @@ -865,17 +937,11 @@ public: private: _LIBCPP_HIDE_FROM_ABI const __node_allocator& __node_alloc() const _NOEXCEPT { return __node_alloc_; } - _LIBCPP_HIDE_FROM_ABI __end_node_pointer& __begin_node() _NOEXCEPT { return __begin_node_; } - _LIBCPP_HIDE_FROM_ABI const __end_node_pointer& __begin_node() const _NOEXCEPT { return __begin_node_; } public: _LIBCPP_HIDE_FROM_ABI allocator_type __alloc() const _NOEXCEPT { return allocator_type(__node_alloc()); } -private: - _LIBCPP_HIDE_FROM_ABI size_type& size() _NOEXCEPT { return __size_; } - -public: - _LIBCPP_HIDE_FROM_ABI const size_type& size() const _NOEXCEPT { return __size_; } + _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __size_; } _LIBCPP_HIDE_FROM_ABI value_compare& value_comp() _NOEXCEPT { return __value_comp_; } _LIBCPP_HIDE_FROM_ABI const value_compare& value_comp() const _NOEXCEPT { return __value_comp_; } @@ -888,32 +954,61 @@ public: return std::addressof(__end_node()->__left_); } - typedef __tree_iterator<_Tp, __node_pointer, difference_type> iterator; - typedef __tree_const_iterator<_Tp, __node_pointer, difference_type> const_iterator; + using iterator = __tree_iterator<_Tp, __node_pointer, difference_type>; + using const_iterator = __tree_const_iterator<_Tp, __node_pointer, difference_type>; _LIBCPP_HIDE_FROM_ABI explicit __tree(const value_compare& __comp) _NOEXCEPT_( - is_nothrow_default_constructible<__node_allocator>::value&& is_nothrow_copy_constructible::value); - _LIBCPP_HIDE_FROM_ABI explicit __tree(const allocator_type& __a); - _LIBCPP_HIDE_FROM_ABI __tree(const value_compare& __comp, const allocator_type& __a); + is_nothrow_default_constructible<__node_allocator>::value&& is_nothrow_copy_constructible::value) + : __size_(0), __value_comp_(__comp) { + __begin_node_ = __end_node(); + } + + _LIBCPP_HIDE_FROM_ABI explicit __tree(const allocator_type& __a) + : __begin_node_(), __node_alloc_(__node_allocator(__a)), __size_(0) { + __begin_node_ = __end_node(); + } + + _LIBCPP_HIDE_FROM_ABI __tree(const value_compare& __comp, const allocator_type& __a) + : __begin_node_(), __node_alloc_(__node_allocator(__a)), __size_(0), __value_comp_(__comp) { + __begin_node_ = __end_node(); + } + _LIBCPP_HIDE_FROM_ABI __tree(const __tree& __t); + + _LIBCPP_HIDE_FROM_ABI __tree(const __tree& __other, const allocator_type& __alloc) + : __begin_node_(__end_node()), __node_alloc_(__alloc), __size_(0), __value_comp_(__other.value_comp()) { + if (__other.size() == 0) + return; + + *__root_ptr() = static_cast<__node_base_pointer>(__copy_construct_tree(__other.__root())); + __root()->__parent_ = __end_node(); + __begin_node_ = static_cast<__end_node_pointer>(std::__tree_min(__end_node()->__left_)); + __size_ = __other.size(); + } + _LIBCPP_HIDE_FROM_ABI __tree& operator=(const __tree& __t); template _LIBCPP_HIDE_FROM_ABI void __assign_unique(_ForwardIterator __first, _ForwardIterator __last); - template - _LIBCPP_HIDE_FROM_ABI void __assign_multi(_InputIterator __first, _InputIterator __last); _LIBCPP_HIDE_FROM_ABI __tree(__tree&& __t) _NOEXCEPT_( is_nothrow_move_constructible<__node_allocator>::value&& is_nothrow_move_constructible::value); _LIBCPP_HIDE_FROM_ABI __tree(__tree&& __t, const allocator_type& __a); + _LIBCPP_HIDE_FROM_ABI __tree& operator=(__tree&& __t) _NOEXCEPT_(is_nothrow_move_assignable::value && ((__node_traits::propagate_on_container_move_assignment::value && is_nothrow_move_assignable<__node_allocator>::value) || - allocator_traits<__node_allocator>::is_always_equal::value)); + allocator_traits<__node_allocator>::is_always_equal::value)) { + __move_assign(__t, integral_constant()); + return *this; + } - _LIBCPP_HIDE_FROM_ABI ~__tree(); + _LIBCPP_HIDE_FROM_ABI ~__tree() { + static_assert(is_copy_constructible::value, "Comparator must be copy-constructible."); + destroy(__root()); + } - _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return iterator(__begin_node()); } - _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return const_iterator(__begin_node()); } + _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return iterator(__begin_node_); } + _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return const_iterator(__begin_node_); } _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return iterator(__end_node()); } _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return const_iterator(__end_node()); } @@ -931,116 +1026,151 @@ public: _NOEXCEPT_(__is_nothrow_swappable_v); #endif - template - _LIBCPP_HIDE_FROM_ABI pair __emplace_unique_key_args(_Key const&, _Args&&... __args); - template - _LIBCPP_HIDE_FROM_ABI pair __emplace_hint_unique_key_args(const_iterator, _Key const&, _Args&&...); - - template - _LIBCPP_HIDE_FROM_ABI pair __emplace_unique_impl(_Args&&... __args); - - template - _LIBCPP_HIDE_FROM_ABI iterator __emplace_hint_unique_impl(const_iterator __p, _Args&&... __args); - template _LIBCPP_HIDE_FROM_ABI iterator __emplace_multi(_Args&&... __args); template _LIBCPP_HIDE_FROM_ABI iterator __emplace_hint_multi(const_iterator __p, _Args&&... __args); - template - _LIBCPP_HIDE_FROM_ABI pair __emplace_unique(_Pp&& __x) { - return __emplace_unique_extract_key(std::forward<_Pp>(__x), __can_extract_key<_Pp, key_type>()); - } - - template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI pair __emplace_unique(_First&& __f, _Second&& __s) { - return __emplace_unique_key_args(__f, std::forward<_First>(__f), std::forward<_Second>(__s)); - } - template _LIBCPP_HIDE_FROM_ABI pair __emplace_unique(_Args&&... __args) { - return __emplace_unique_impl(std::forward<_Args>(__args)...); - } - - template - _LIBCPP_HIDE_FROM_ABI pair __emplace_unique_extract_key(_Pp&& __x, __extract_key_fail_tag) { - return __emplace_unique_impl(std::forward<_Pp>(__x)); - } - - template - _LIBCPP_HIDE_FROM_ABI pair __emplace_unique_extract_key(_Pp&& __x, __extract_key_self_tag) { - return __emplace_unique_key_args(__x, std::forward<_Pp>(__x)); - } - - template - _LIBCPP_HIDE_FROM_ABI pair __emplace_unique_extract_key(_Pp&& __x, __extract_key_first_tag) { - return __emplace_unique_key_args(__x.first, std::forward<_Pp>(__x)); - } - - template - _LIBCPP_HIDE_FROM_ABI iterator __emplace_hint_unique(const_iterator __p, _Pp&& __x) { - return __emplace_hint_unique_extract_key(__p, std::forward<_Pp>(__x), __can_extract_key<_Pp, key_type>()); - } - - template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI iterator __emplace_hint_unique(const_iterator __p, _First&& __f, _Second&& __s) { - return __emplace_hint_unique_key_args(__p, __f, std::forward<_First>(__f), std::forward<_Second>(__s)).first; + return std::__try_key_extraction( + [this](const key_type& __key, _Args&&... __args2) { + auto [__parent, __child] = __find_equal(__key); + __node_pointer __r = static_cast<__node_pointer>(__child); + bool __inserted = false; + if (__child == nullptr) { + __node_holder __h = __construct_node(std::forward<_Args>(__args2)...); + __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get())); + __r = __h.release(); + __inserted = true; + } + return pair(iterator(__r), __inserted); + }, + [this](_Args&&... __args2) { + __node_holder __h = __construct_node(std::forward<_Args>(__args2)...); + auto [__parent, __child] = __find_equal(__h->__get_value()); + __node_pointer __r = static_cast<__node_pointer>(__child); + bool __inserted = false; + if (__child == nullptr) { + __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get())); + __r = __h.release(); + __inserted = true; + } + return pair(iterator(__r), __inserted); + }, + std::forward<_Args>(__args)...); } template - _LIBCPP_HIDE_FROM_ABI iterator __emplace_hint_unique(const_iterator __p, _Args&&... __args) { - return __emplace_hint_unique_impl(__p, std::forward<_Args>(__args)...); + _LIBCPP_HIDE_FROM_ABI pair __emplace_hint_unique(const_iterator __p, _Args&&... __args) { + return std::__try_key_extraction( + [this, __p](const key_type& __key, _Args&&... __args2) { + __node_base_pointer __dummy; + auto [__parent, __child] = __find_equal(__p, __dummy, __key); + __node_pointer __r = static_cast<__node_pointer>(__child); + bool __inserted = false; + if (__child == nullptr) { + __node_holder __h = __construct_node(std::forward<_Args>(__args2)...); + __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get())); + __r = __h.release(); + __inserted = true; + } + return pair(iterator(__r), __inserted); + }, + [this, __p](_Args&&... __args2) { + __node_holder __h = __construct_node(std::forward<_Args>(__args2)...); + __node_base_pointer __dummy; + auto [__parent, __child] = __find_equal(__p, __dummy, __h->__get_value()); + __node_pointer __r = static_cast<__node_pointer>(__child); + if (__child == nullptr) { + __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get())); + __r = __h.release(); + } + return pair(iterator(__r), __child == nullptr); + }, + std::forward<_Args>(__args)...); } - template - _LIBCPP_HIDE_FROM_ABI iterator - __emplace_hint_unique_extract_key(const_iterator __p, _Pp&& __x, __extract_key_fail_tag) { - return __emplace_hint_unique_impl(__p, std::forward<_Pp>(__x)); + template + _LIBCPP_HIDE_FROM_ABI void __insert_range_multi(_InIter __first, _Sent __last) { + if (__first == __last) + return; + + if (__root() == nullptr) { // Make sure we always have a root node + __insert_node_at( + __end_node(), __end_node()->__left_, static_cast<__node_base_pointer>(__construct_node(*__first).release())); + ++__first; + } + + auto __max_node = static_cast<__node_pointer>(std::__tree_max(static_cast<__node_base_pointer>(__root()))); + + for (; __first != __last; ++__first) { + __node_holder __nd = __construct_node(*__first); + // Always check the max node first. This optimizes for sorted ranges inserted at the end. + if (!value_comp()(__nd->__get_value(), __max_node->__get_value())) { // __node >= __max_val + __insert_node_at(static_cast<__end_node_pointer>(__max_node), + __max_node->__right_, + static_cast<__node_base_pointer>(__nd.get())); + __max_node = __nd.release(); + } else { + __end_node_pointer __parent; + __node_base_pointer& __child = __find_leaf_high(__parent, __nd->__get_value()); + __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__nd.release())); + } + } } - template - _LIBCPP_HIDE_FROM_ABI iterator - __emplace_hint_unique_extract_key(const_iterator __p, _Pp&& __x, __extract_key_self_tag) { - return __emplace_hint_unique_key_args(__p, __x, std::forward<_Pp>(__x)).first; + template + _LIBCPP_HIDE_FROM_ABI void __insert_range_unique(_InIter __first, _Sent __last) { + if (__first == __last) + return; + + if (__root() == nullptr) { + __insert_node_at( + __end_node(), __end_node()->__left_, static_cast<__node_base_pointer>(__construct_node(*__first).release())); + ++__first; + } + + auto __max_node = static_cast<__node_pointer>(std::__tree_max(static_cast<__node_base_pointer>(__root()))); + + using __reference = decltype(*__first); + + for (; __first != __last; ++__first) { + std::__try_key_extraction( + [this, &__max_node](const key_type& __key, __reference&& __val) { + if (value_comp()(__max_node->__get_value(), __key)) { // __key > __max_node + __node_holder __nd = __construct_node(std::forward<__reference>(__val)); + __insert_node_at(static_cast<__end_node_pointer>(__max_node), + __max_node->__right_, + static_cast<__node_base_pointer>(__nd.get())); + __max_node = __nd.release(); + } else { + auto [__parent, __child] = __find_equal(__key); + if (__child == nullptr) { + __node_holder __nd = __construct_node(std::forward<__reference>(__val)); + __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__nd.release())); + } + } + }, + [this, &__max_node](__reference&& __val) { + __node_holder __nd = __construct_node(std::forward<__reference>(__val)); + if (value_comp()(__max_node->__get_value(), __nd->__get_value())) { // __node > __max_node + __insert_node_at(static_cast<__end_node_pointer>(__max_node), + __max_node->__right_, + static_cast<__node_base_pointer>(__nd.get())); + __max_node = __nd.release(); + } else { + auto [__parent, __child] = __find_equal(__nd->__get_value()); + if (__child == nullptr) { + __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__nd.release())); + } + } + }, + *__first); + } } - template - _LIBCPP_HIDE_FROM_ABI iterator - __emplace_hint_unique_extract_key(const_iterator __p, _Pp&& __x, __extract_key_first_tag) { - return __emplace_hint_unique_key_args(__p, __x.first, std::forward<_Pp>(__x)).first; - } - - template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI void - __insert_unique_from_orphaned_node(const_iterator __p, __get_node_value_type_t<_Tp>&& __value) { - __emplace_hint_unique(__p, const_cast(__value.first), std::move(__value.second)); - } - - template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI void __insert_unique_from_orphaned_node(const_iterator __p, _Tp&& __value) { - __emplace_hint_unique(__p, std::move(__value)); - } - - template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI void __insert_multi_from_orphaned_node(const_iterator __p, value_type&& __value) { - __emplace_hint_multi(__p, const_cast(__value.first), std::move(__value.second)); - } - - template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI void __insert_multi_from_orphaned_node(const_iterator __p, _Tp&& __value) { - __emplace_hint_multi(__p, std::move(__value)); - } - - _LIBCPP_HIDE_FROM_ABI pair __node_assign_unique(const value_type& __v, __node_pointer __dest); - - _LIBCPP_HIDE_FROM_ABI iterator __node_insert_multi(__node_pointer __nd); - _LIBCPP_HIDE_FROM_ABI iterator __node_insert_multi(const_iterator __p, __node_pointer __nd); - _LIBCPP_HIDE_FROM_ABI iterator __remove_node_pointer(__node_pointer) _NOEXCEPT; #if _LIBCPP_STD_VER >= 17 @@ -1048,15 +1178,15 @@ public: _LIBCPP_HIDE_FROM_ABI _InsertReturnType __node_handle_insert_unique(_NodeHandle&&); template _LIBCPP_HIDE_FROM_ABI iterator __node_handle_insert_unique(const_iterator, _NodeHandle&&); - template - _LIBCPP_HIDE_FROM_ABI void __node_handle_merge_unique(_Tree& __source); + template + _LIBCPP_HIDE_FROM_ABI void __node_handle_merge_unique(__tree<_Tp, _Comp2, _Allocator>& __source); template _LIBCPP_HIDE_FROM_ABI iterator __node_handle_insert_multi(_NodeHandle&&); template _LIBCPP_HIDE_FROM_ABI iterator __node_handle_insert_multi(const_iterator, _NodeHandle&&); - template - _LIBCPP_HIDE_FROM_ABI void __node_handle_merge_multi(_Tree& __source); + template + _LIBCPP_HIDE_FROM_ABI void __node_handle_merge_multi(__tree<_Tp, _Comp2, _Allocator>& __source); template _LIBCPP_HIDE_FROM_ABI _NodeHandle __node_handle_extract(key_type const&); @@ -1075,41 +1205,157 @@ public: __insert_node_at(__end_node_pointer __parent, __node_base_pointer& __child, __node_base_pointer __new_node) _NOEXCEPT; template - _LIBCPP_HIDE_FROM_ABI iterator find(const _Key& __v); + _LIBCPP_HIDE_FROM_ABI iterator find(const _Key& __key) { + auto [__, __match] = __find_equal(__key); + if (__match == nullptr) + return end(); + return iterator(static_cast<__node_pointer>(__match)); + } + template - _LIBCPP_HIDE_FROM_ABI const_iterator find(const _Key& __v) const; + _LIBCPP_HIDE_FROM_ABI const_iterator find(const _Key& __key) const { + auto [__, __match] = __find_equal(__key); + if (__match == nullptr) + return end(); + return const_iterator(static_cast<__node_pointer>(__match)); + } template _LIBCPP_HIDE_FROM_ABI size_type __count_unique(const _Key& __k) const; template _LIBCPP_HIDE_FROM_ABI size_type __count_multi(const _Key& __k) const; - template - _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _Key& __v) { - return __lower_bound(__v, __root(), __end_node()); + template + _LIBCPP_HIDE_FROM_ABI __end_node_pointer __lower_upper_bound_unique_impl(const _Key& __v) const { + auto __rt = __root(); + auto __result = __end_node(); + auto __comp = __lazy_synth_three_way_comparator<_Compare, _Key, value_type>(value_comp()); + while (__rt != nullptr) { + auto __comp_res = __comp(__v, __rt->__get_value()); + + if (__comp_res.__less()) { + __result = static_cast<__end_node_pointer>(__rt); + __rt = static_cast<__node_pointer>(__rt->__left_); + } else if (__comp_res.__greater()) { + __rt = static_cast<__node_pointer>(__rt->__right_); + } else if _LIBCPP_CONSTEXPR (_LowerBound) { + return static_cast<__end_node_pointer>(__rt); + } else { + return __rt->__right_ ? static_cast<__end_node_pointer>(std::__tree_min(__rt->__right_)) : __result; + } + } + return __result; } + + // Compatibility escape hatch for comparators that are not strict weak orderings. This + // can be removed for the LLVM 23 release. +#if defined(_LIBCPP_ENABLE_LEGACY_TREE_LOWER_UPPER_BOUND) template - _LIBCPP_HIDE_FROM_ABI iterator __lower_bound(const _Key& __v, __node_pointer __root, __end_node_pointer __result); - template - _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _Key& __v) const { - return __lower_bound(__v, __root(), __end_node()); + _LIBCPP_HIDE_FROM_ABI __end_node_pointer __lower_bound_unique_compat_impl(const _Key& __v) const { + auto __rt = __root(); + auto __result = __end_node(); + while (__rt != nullptr) { + if (!value_comp()(__rt->__get_value(), __v)) { + __result = std::__static_fancy_pointer_cast<__end_node_pointer>(__rt); + __rt = std::__static_fancy_pointer_cast<__node_pointer>(__rt->__left_); + } else { + __rt = std::__static_fancy_pointer_cast<__node_pointer>(__rt->__right_); + } + } + return __result; } + + template + _LIBCPP_HIDE_FROM_ABI __end_node_pointer __upper_bound_unique_compat_impl(const _Key& __v) const { + auto __rt = __root(); + auto __result = __end_node(); + while (__rt != nullptr) { + if (value_comp()(__v, __rt->__get_value())) { + __result = std::__static_fancy_pointer_cast<__end_node_pointer>(__rt); + __rt = std::__static_fancy_pointer_cast<__node_pointer>(__rt->__left_); + } else { + __rt = std::__static_fancy_pointer_cast<__node_pointer>(__rt->__right_); + } + } + return __result; + } +#endif // _LIBCPP_ENABLE_LEGACY_TREE_LOWER_UPPER_BOUND + + template + _LIBCPP_HIDE_FROM_ABI iterator __lower_bound_unique(const _Key& __v) { +#if defined(_LIBCPP_ENABLE_LEGACY_TREE_LOWER_UPPER_BOUND) + return iterator(__lower_bound_unique_compat_impl(__v)); +#else + return iterator(__lower_upper_bound_unique_impl(__v)); +#endif + } + + template + _LIBCPP_HIDE_FROM_ABI const_iterator __lower_bound_unique(const _Key& __v) const { +#if defined(_LIBCPP_ENABLE_LEGACY_TREE_LOWER_UPPER_BOUND) + return const_iterator(__lower_bound_unique_compat_impl(__v)); +#else + return const_iterator(__lower_upper_bound_unique_impl(__v)); +#endif + } + + template + _LIBCPP_HIDE_FROM_ABI iterator __upper_bound_unique(const _Key& __v) { +#if defined(_LIBCPP_ENABLE_LEGACY_TREE_LOWER_UPPER_BOUND) + return iterator(__upper_bound_unique_compat_impl(__v)); +#else + return iterator(__lower_upper_bound_unique_impl(__v)); +#endif + } + + template + _LIBCPP_HIDE_FROM_ABI const_iterator __upper_bound_unique(const _Key& __v) const { +#if defined(_LIBCPP_ENABLE_LEGACY_TREE_LOWER_UPPER_BOUND) + return iterator(__upper_bound_unique_compat_impl(__v)); +#else + return iterator(__lower_upper_bound_unique_impl(__v)); +#endif + } + +private: + template + _LIBCPP_HIDE_FROM_ABI iterator + __lower_bound_multi(const _Key& __v, __node_pointer __root, __end_node_pointer __result); + template _LIBCPP_HIDE_FROM_ABI const_iterator - __lower_bound(const _Key& __v, __node_pointer __root, __end_node_pointer __result) const; + __lower_bound_multi(const _Key& __v, __node_pointer __root, __end_node_pointer __result) const; + +public: template - _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _Key& __v) { - return __upper_bound(__v, __root(), __end_node()); + _LIBCPP_HIDE_FROM_ABI iterator __lower_bound_multi(const _Key& __v) { + return __lower_bound_multi(__v, __root(), __end_node()); } template - _LIBCPP_HIDE_FROM_ABI iterator __upper_bound(const _Key& __v, __node_pointer __root, __end_node_pointer __result); - template - _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _Key& __v) const { - return __upper_bound(__v, __root(), __end_node()); + _LIBCPP_HIDE_FROM_ABI const_iterator __lower_bound_multi(const _Key& __v) const { + return __lower_bound_multi(__v, __root(), __end_node()); } + + template + _LIBCPP_HIDE_FROM_ABI iterator __upper_bound_multi(const _Key& __v) { + return __upper_bound_multi(__v, __root(), __end_node()); + } + + template + _LIBCPP_HIDE_FROM_ABI const_iterator __upper_bound_multi(const _Key& __v) const { + return __upper_bound_multi(__v, __root(), __end_node()); + } + +private: + template + _LIBCPP_HIDE_FROM_ABI iterator + __upper_bound_multi(const _Key& __v, __node_pointer __root, __end_node_pointer __result); + template _LIBCPP_HIDE_FROM_ABI const_iterator - __upper_bound(const _Key& __v, __node_pointer __root, __end_node_pointer __result) const; + __upper_bound_multi(const _Key& __v, __node_pointer __root, __end_node_pointer __result) const; + +public: template _LIBCPP_HIDE_FROM_ABI pair __equal_range_unique(const _Key& __k); template @@ -1120,22 +1366,24 @@ public: template _LIBCPP_HIDE_FROM_ABI pair __equal_range_multi(const _Key& __k) const; - typedef __tree_node_destructor<__node_allocator> _Dp; - typedef unique_ptr<__node, _Dp> __node_holder; + using _Dp _LIBCPP_NODEBUG = __tree_node_destructor<__node_allocator>; + using __node_holder _LIBCPP_NODEBUG = unique_ptr<__node, _Dp>; _LIBCPP_HIDE_FROM_ABI __node_holder remove(const_iterator __p) _NOEXCEPT; // FIXME: Make this function const qualified. Unfortunately doing so // breaks existing code which uses non-const callable comparators. template - _LIBCPP_HIDE_FROM_ABI __node_base_pointer& __find_equal(__end_node_pointer& __parent, const _Key& __v); + _LIBCPP_HIDE_FROM_ABI pair<__end_node_pointer, __node_base_pointer&> __find_equal(const _Key& __v); + template - _LIBCPP_HIDE_FROM_ABI __node_base_pointer& __find_equal(__end_node_pointer& __parent, const _Key& __v) const { - return const_cast<__tree*>(this)->__find_equal(__parent, __v); + _LIBCPP_HIDE_FROM_ABI pair<__end_node_pointer, __node_base_pointer&> __find_equal(const _Key& __v) const { + return const_cast<__tree*>(this)->__find_equal(__v); } + template - _LIBCPP_HIDE_FROM_ABI __node_base_pointer& - __find_equal(const_iterator __hint, __end_node_pointer& __parent, __node_base_pointer& __dummy, const _Key& __v); + _LIBCPP_HIDE_FROM_ABI pair<__end_node_pointer, __node_base_pointer&> + __find_equal(const_iterator __hint, __node_base_pointer& __dummy, const _Key& __v); _LIBCPP_HIDE_FROM_ABI void __copy_assign_alloc(const __tree& __t) { __copy_assign_alloc(__t, integral_constant()); @@ -1160,7 +1408,7 @@ private: _LIBCPP_HIDE_FROM_ABI __node_holder __construct_node(_Args&&... __args); // TODO: Make this _LIBCPP_HIDE_FROM_ABI - _LIBCPP_HIDDEN void destroy(__node_pointer __nd) _NOEXCEPT; + _LIBCPP_HIDDEN void destroy(__node_pointer __nd) _NOEXCEPT { (__tree_deleter(__node_alloc_))(__nd); } _LIBCPP_HIDE_FROM_ABI void __move_assign(__tree& __t, false_type); _LIBCPP_HIDE_FROM_ABI void __move_assign(__tree& __t, true_type) _NOEXCEPT_( @@ -1178,7 +1426,7 @@ private: } _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__tree&, false_type) _NOEXCEPT {} - template ::value, int> = 0> + template , int> = 0> _LIBCPP_HIDE_FROM_ABI static void __assign_value(__get_node_value_type_t& __lhs, _From&& __rhs) { using __key_type = __remove_const_t; @@ -1188,166 +1436,203 @@ private: __lhs.second = std::forward<_From>(__rhs).second; } - template ::value, int> = 0> + template , int> = 0> _LIBCPP_HIDE_FROM_ABI static void __assign_value(_To& __lhs, _From&& __rhs) { __lhs = std::forward<_From>(__rhs); } - struct _DetachedTreeCache { - _LIBCPP_HIDE_FROM_ABI explicit _DetachedTreeCache(__tree* __t) _NOEXCEPT - : __t_(__t), - __cache_root_(__detach_from_tree(__t)) { - __advance(); + class __tree_deleter { + __node_allocator& __alloc_; + + public: + using pointer = __node_pointer; + + _LIBCPP_HIDE_FROM_ABI __tree_deleter(__node_allocator& __alloc) : __alloc_(__alloc) {} + +#ifdef _LIBCPP_COMPILER_CLANG_BASED // FIXME: GCC complains about not being able to always_inline a recursive function + _LIBCPP_HIDE_FROM_ABI +#endif + void + operator()(__node_pointer __ptr) { + if (!__ptr) + return; + + (*this)(static_cast<__node_pointer>(__ptr->__left_)); + + auto __right = __ptr->__right_; + + __node_traits::destroy(__alloc_, std::addressof(__ptr->__get_value())); + __node_traits::deallocate(__alloc_, __ptr, 1); + + (*this)(static_cast<__node_pointer>(__right)); } - - _LIBCPP_HIDE_FROM_ABI __node_pointer __get() const _NOEXCEPT { return __cache_elem_; } - - _LIBCPP_HIDE_FROM_ABI void __advance() _NOEXCEPT { - __cache_elem_ = __cache_root_; - if (__cache_root_) { - __cache_root_ = __detach_next(__cache_root_); - } - } - - _LIBCPP_HIDE_FROM_ABI ~_DetachedTreeCache() { - __t_->destroy(__cache_elem_); - if (__cache_root_) { - while (__cache_root_->__parent_ != nullptr) - __cache_root_ = static_cast<__node_pointer>(__cache_root_->__parent_); - __t_->destroy(__cache_root_); - } - } - - _DetachedTreeCache(_DetachedTreeCache const&) = delete; - _DetachedTreeCache& operator=(_DetachedTreeCache const&) = delete; - - private: - _LIBCPP_HIDE_FROM_ABI static __node_pointer __detach_from_tree(__tree* __t) _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI static __node_pointer __detach_next(__node_pointer) _NOEXCEPT; - - __tree* __t_; - __node_pointer __cache_root_; - __node_pointer __cache_elem_; }; + + // This copy construction will always produce a correct red-black-tree assuming the incoming tree is correct, since we + // copy the exact structure 1:1. Since this is for copy construction _only_ we know that we get a correct tree. If we + // didn't get a correct tree, the invariants of __tree are broken and we have a much bigger problem than an improperly + // balanced tree. + template +#ifdef _LIBCPP_COMPILER_CLANG_BASED // FIXME: GCC complains about not being able to always_inline a recursive function + _LIBCPP_HIDE_FROM_ABI +#endif + __node_pointer __construct_from_tree(__node_pointer __src, _NodeConstructor __construct) { + if (!__src) + return nullptr; + + __node_holder __new_node = __construct(__src->__get_value()); + + unique_ptr<__node, __tree_deleter> __left( + __construct_from_tree(static_cast<__node_pointer>(__src->__left_), __construct), __node_alloc_); + __node_pointer __right = __construct_from_tree(static_cast<__node_pointer>(__src->__right_), __construct); + + __node_pointer __new_node_ptr = __new_node.release(); + + __new_node_ptr->__is_black_ = __src->__is_black_; + __new_node_ptr->__left_ = static_cast<__node_base_pointer>(__left.release()); + __new_node_ptr->__right_ = static_cast<__node_base_pointer>(__right); + if (__new_node_ptr->__left_) + __new_node_ptr->__left_->__parent_ = static_cast<__end_node_pointer>(__new_node_ptr); + if (__new_node_ptr->__right_) + __new_node_ptr->__right_->__parent_ = static_cast<__end_node_pointer>(__new_node_ptr); + return __new_node_ptr; + } + + _LIBCPP_HIDE_FROM_ABI __node_pointer __copy_construct_tree(__node_pointer __src) { + return __construct_from_tree(__src, [this](const value_type& __val) { return __construct_node(__val); }); + } + + template , int> = 0> + _LIBCPP_HIDE_FROM_ABI __node_pointer __move_construct_tree(__node_pointer __src) { + return __construct_from_tree(__src, [this](value_type& __val) { + return __construct_node(const_cast(__val.first), std::move(__val.second)); + }); + } + + template , int> = 0> + _LIBCPP_HIDE_FROM_ABI __node_pointer __move_construct_tree(__node_pointer __src) { + return __construct_from_tree(__src, [this](value_type& __val) { return __construct_node(std::move(__val)); }); + } + + template + // This copy assignment will always produce a correct red-black-tree assuming the incoming tree is correct, since our + // own tree is a red-black-tree and the incoming tree is a red-black-tree. The invariants of a red-black-tree are + // temporarily not met until all of the incoming red-black tree is copied. +#ifdef _LIBCPP_COMPILER_CLANG_BASED // FIXME: GCC complains about not being able to always_inline a recursive function + _LIBCPP_HIDE_FROM_ABI +#endif + __node_pointer __assign_from_tree( + __node_pointer __dest, __node_pointer __src, _Assignment __assign, _ConstructionAlg __construct_subtree) { + if (!__src) { + destroy(__dest); + return nullptr; + } + + __assign(__dest->__get_value(), __src->__get_value()); + __dest->__is_black_ = __src->__is_black_; + + // If we already have a left node in the destination tree, reuse it and copy-assign recursively + if (__dest->__left_) { + __dest->__left_ = static_cast<__node_base_pointer>(__assign_from_tree( + static_cast<__node_pointer>(__dest->__left_), + static_cast<__node_pointer>(__src->__left_), + __assign, + __construct_subtree)); + + // Otherwise, we must create new nodes; copy-construct from here on + } else if (__src->__left_) { + auto __new_left = __construct_subtree(static_cast<__node_pointer>(__src->__left_)); + __dest->__left_ = static_cast<__node_base_pointer>(__new_left); + __new_left->__parent_ = static_cast<__end_node_pointer>(__dest); + } + + // Identical to the left case above, just for the right nodes + if (__dest->__right_) { + __dest->__right_ = static_cast<__node_base_pointer>(__assign_from_tree( + static_cast<__node_pointer>(__dest->__right_), + static_cast<__node_pointer>(__src->__right_), + __assign, + __construct_subtree)); + } else if (__src->__right_) { + auto __new_right = __construct_subtree(static_cast<__node_pointer>(__src->__right_)); + __dest->__right_ = static_cast<__node_base_pointer>(__new_right); + __new_right->__parent_ = static_cast<__end_node_pointer>(__dest); + } + + return __dest; + } + + _LIBCPP_HIDE_FROM_ABI __node_pointer __copy_assign_tree(__node_pointer __dest, __node_pointer __src) { + return __assign_from_tree( + __dest, + __src, + [](value_type& __lhs, const value_type& __rhs) { __assign_value(__lhs, __rhs); }, + [this](__node_pointer __nd) { return __copy_construct_tree(__nd); }); + } + + _LIBCPP_HIDE_FROM_ABI __node_pointer __move_assign_tree(__node_pointer __dest, __node_pointer __src) { + return __assign_from_tree( + __dest, + __src, + [](value_type& __lhs, value_type& __rhs) { __assign_value(__lhs, std::move(__rhs)); }, + [this](__node_pointer __nd) { return __move_construct_tree(__nd); }); + } + + friend struct __specialized_algorithm<_Algorithm::__for_each, __single_range<__tree> >; }; +#if _LIBCPP_STD_VER >= 14 template -__tree<_Tp, _Compare, _Allocator>::__tree(const value_compare& __comp) _NOEXCEPT_( - is_nothrow_default_constructible<__node_allocator>::value&& is_nothrow_copy_constructible::value) - : __size_(0), __value_comp_(__comp) { - __begin_node() = __end_node(); -} +struct __specialized_algorithm<_Algorithm::__for_each, __single_range<__tree<_Tp, _Compare, _Allocator> > > { + static const bool __has_algorithm = true; -template -__tree<_Tp, _Compare, _Allocator>::__tree(const allocator_type& __a) - : __begin_node_(), __node_alloc_(__node_allocator(__a)), __size_(0) { - __begin_node() = __end_node(); -} + using __node_pointer _LIBCPP_NODEBUG = typename __tree<_Tp, _Compare, _Allocator>::__node_pointer; -template -__tree<_Tp, _Compare, _Allocator>::__tree(const value_compare& __comp, const allocator_type& __a) - : __begin_node_(), __node_alloc_(__node_allocator(__a)), __size_(0), __value_comp_(__comp) { - __begin_node() = __end_node(); -} - -// Precondition: size() != 0 -template -typename __tree<_Tp, _Compare, _Allocator>::__node_pointer -__tree<_Tp, _Compare, _Allocator>::_DetachedTreeCache::__detach_from_tree(__tree* __t) _NOEXCEPT { - __node_pointer __cache = static_cast<__node_pointer>(__t->__begin_node()); - __t->__begin_node() = __t->__end_node(); - __t->__end_node()->__left_->__parent_ = nullptr; - __t->__end_node()->__left_ = nullptr; - __t->size() = 0; - // __cache->__left_ == nullptr - if (__cache->__right_ != nullptr) - __cache = static_cast<__node_pointer>(__cache->__right_); - // __cache->__left_ == nullptr - // __cache->__right_ == nullptr - return __cache; -} - -// Precondition: __cache != nullptr -// __cache->left_ == nullptr -// __cache->right_ == nullptr -// This is no longer a red-black tree -template -typename __tree<_Tp, _Compare, _Allocator>::__node_pointer -__tree<_Tp, _Compare, _Allocator>::_DetachedTreeCache::__detach_next(__node_pointer __cache) _NOEXCEPT { - if (__cache->__parent_ == nullptr) - return nullptr; - if (std::__tree_is_left_child(static_cast<__node_base_pointer>(__cache))) { - __cache->__parent_->__left_ = nullptr; - __cache = static_cast<__node_pointer>(__cache->__parent_); - if (__cache->__right_ == nullptr) - return __cache; - return static_cast<__node_pointer>(std::__tree_leaf(__cache->__right_)); + template + _LIBCPP_HIDE_FROM_ABI static auto operator()(_Tree&& __range, _Func __func, _Proj __proj) { + if (__range.size() != 0) + std::__tree_iterate_from_root<__copy_cvref_t<_Tree, typename __remove_cvref_t<_Tree>::value_type>>( + [](__node_pointer) { return false; }, __range.__root(), __func, __proj); + return std::make_pair(__range.end(), std::move(__func)); } - // __cache is right child - __cache->__parent_unsafe()->__right_ = nullptr; - __cache = static_cast<__node_pointer>(__cache->__parent_); - if (__cache->__left_ == nullptr) - return __cache; - return static_cast<__node_pointer>(std::__tree_leaf(__cache->__left_)); -} +}; +#endif template __tree<_Tp, _Compare, _Allocator>& __tree<_Tp, _Compare, _Allocator>::operator=(const __tree& __t) { - if (this != std::addressof(__t)) { - value_comp() = __t.value_comp(); - __copy_assign_alloc(__t); - __assign_multi(__t.begin(), __t.end()); + if (this == std::addressof(__t)) + return *this; + + value_comp() = __t.value_comp(); + __copy_assign_alloc(__t); + + if (__size_ != 0) { + *__root_ptr() = static_cast<__node_base_pointer>(__copy_assign_tree(__root(), __t.__root())); + } else { + *__root_ptr() = static_cast<__node_base_pointer>(__copy_construct_tree(__t.__root())); + if (__root()) + __root()->__parent_ = __end_node(); } + __begin_node_ = + __end_node()->__left_ ? static_cast<__end_node_pointer>(std::__tree_min(__end_node()->__left_)) : __end_node(); + __size_ = __t.size(); + return *this; } -template -template -void __tree<_Tp, _Compare, _Allocator>::__assign_unique(_ForwardIterator __first, _ForwardIterator __last) { - typedef iterator_traits<_ForwardIterator> _ITraits; - typedef typename _ITraits::value_type _ItValueType; - static_assert( - is_same<_ItValueType, value_type>::value, "__assign_unique may only be called with the containers value type"); - static_assert( - __has_forward_iterator_category<_ForwardIterator>::value, "__assign_unique requires a forward iterator"); - if (size() != 0) { - _DetachedTreeCache __cache(this); - for (; __cache.__get() != nullptr && __first != __last; ++__first) { - if (__node_assign_unique(*__first, __cache.__get()).second) - __cache.__advance(); - } - } - for (; __first != __last; ++__first) - __emplace_unique(*__first); -} - -template -template -void __tree<_Tp, _Compare, _Allocator>::__assign_multi(_InputIterator __first, _InputIterator __last) { - typedef iterator_traits<_InputIterator> _ITraits; - typedef typename _ITraits::value_type _ItValueType; - static_assert( - is_same<_ItValueType, value_type>::value, "__assign_multi may only be called with the containers value_type"); - if (size() != 0) { - _DetachedTreeCache __cache(this); - for (; __cache.__get() && __first != __last; ++__first) { - __assign_value(__cache.__get()->__value_, *__first); - __node_insert_multi(__cache.__get()); - __cache.__advance(); - } - } - const_iterator __e = end(); - for (; __first != __last; ++__first) - __emplace_hint_multi(__e, *__first); -} - template __tree<_Tp, _Compare, _Allocator>::__tree(const __tree& __t) - : __begin_node_(), + : __begin_node_(__end_node()), __node_alloc_(__node_traits::select_on_container_copy_construction(__t.__node_alloc())), __size_(0), __value_comp_(__t.value_comp()) { - __begin_node() = __end_node(); + if (__t.size() == 0) + return; + + *__root_ptr() = static_cast<__node_base_pointer>(__copy_construct_tree(__t.__root())); + __root()->__parent_ = __end_node(); + __begin_node_ = static_cast<__end_node_pointer>(std::__tree_min(__end_node()->__left_)); + __size_ = __t.size(); } template @@ -1358,33 +1643,38 @@ __tree<_Tp, _Compare, _Allocator>::__tree(__tree&& __t) _NOEXCEPT_( __node_alloc_(std::move(__t.__node_alloc_)), __size_(__t.__size_), __value_comp_(std::move(__t.__value_comp_)) { - if (size() == 0) - __begin_node() = __end_node(); + if (__size_ == 0) + __begin_node_ = __end_node(); else { __end_node()->__left_->__parent_ = static_cast<__end_node_pointer>(__end_node()); - __t.__begin_node() = __t.__end_node(); + __t.__begin_node_ = __t.__end_node(); __t.__end_node()->__left_ = nullptr; - __t.size() = 0; + __t.__size_ = 0; } } template __tree<_Tp, _Compare, _Allocator>::__tree(__tree&& __t, const allocator_type& __a) - : __node_alloc_(__node_allocator(__a)), __size_(0), __value_comp_(std::move(__t.value_comp())) { + : __begin_node_(__end_node()), + __node_alloc_(__node_allocator(__a)), + __size_(0), + __value_comp_(std::move(__t.value_comp())) { + if (__t.size() == 0) + return; if (__a == __t.__alloc()) { - if (__t.size() == 0) - __begin_node() = __end_node(); - else { - __begin_node() = __t.__begin_node(); - __end_node()->__left_ = __t.__end_node()->__left_; - __end_node()->__left_->__parent_ = static_cast<__end_node_pointer>(__end_node()); - size() = __t.size(); - __t.__begin_node() = __t.__end_node(); - __t.__end_node()->__left_ = nullptr; - __t.size() = 0; - } + __begin_node_ = __t.__begin_node_; + __end_node()->__left_ = __t.__end_node()->__left_; + __end_node()->__left_->__parent_ = static_cast<__end_node_pointer>(__end_node()); + __size_ = __t.__size_; + __t.__begin_node_ = __t.__end_node(); + __t.__end_node()->__left_ = nullptr; + __t.__size_ = 0; } else { - __begin_node() = __end_node(); + *__root_ptr() = static_cast<__node_base_pointer>(__move_construct_tree(__t.__root())); + __root()->__parent_ = __end_node(); + __begin_node_ = static_cast<__end_node_pointer>(std::__tree_min(__end_node()->__left_)); + __size_ = __t.size(); + __t.clear(); // Ensure that __t is in a valid state after moving out the keys } } @@ -1397,61 +1687,33 @@ void __tree<_Tp, _Compare, _Allocator>::__move_assign(__tree& __t, true_type) __move_assign_alloc(__t); __size_ = __t.__size_; __value_comp_ = std::move(__t.__value_comp_); - if (size() == 0) - __begin_node() = __end_node(); + if (__size_ == 0) + __begin_node_ = __end_node(); else { __end_node()->__left_->__parent_ = static_cast<__end_node_pointer>(__end_node()); - __t.__begin_node() = __t.__end_node(); + __t.__begin_node_ = __t.__end_node(); __t.__end_node()->__left_ = nullptr; - __t.size() = 0; + __t.__size_ = 0; } } template void __tree<_Tp, _Compare, _Allocator>::__move_assign(__tree& __t, false_type) { - if (__node_alloc() == __t.__node_alloc()) + if (__node_alloc() == __t.__node_alloc()) { __move_assign(__t, true_type()); - else { - value_comp() = std::move(__t.value_comp()); - const_iterator __e = end(); - if (size() != 0) { - _DetachedTreeCache __cache(this); - while (__cache.__get() != nullptr && __t.size() != 0) { - __assign_value(__cache.__get()->__value_, std::move(__t.remove(__t.begin())->__value_)); - __node_insert_multi(__cache.__get()); - __cache.__advance(); - } + } else { + value_comp() = std::move(__t.value_comp()); + if (__size_ != 0) { + *__root_ptr() = static_cast<__node_base_pointer>(__move_assign_tree(__root(), __t.__root())); + } else { + *__root_ptr() = static_cast<__node_base_pointer>(__move_construct_tree(__t.__root())); + if (__root()) + __root()->__parent_ = __end_node(); } - while (__t.size() != 0) { - __insert_multi_from_orphaned_node(__e, std::move(__t.remove(__t.begin())->__value_)); - } - } -} - -template -__tree<_Tp, _Compare, _Allocator>& __tree<_Tp, _Compare, _Allocator>::operator=(__tree&& __t) - _NOEXCEPT_(is_nothrow_move_assignable::value && - ((__node_traits::propagate_on_container_move_assignment::value && - is_nothrow_move_assignable<__node_allocator>::value) || - allocator_traits<__node_allocator>::is_always_equal::value)) { - __move_assign(__t, integral_constant()); - return *this; -} - -template -__tree<_Tp, _Compare, _Allocator>::~__tree() { - static_assert(is_copy_constructible::value, "Comparator must be copy-constructible."); - destroy(__root()); -} - -template -void __tree<_Tp, _Compare, _Allocator>::destroy(__node_pointer __nd) _NOEXCEPT { - if (__nd != nullptr) { - destroy(static_cast<__node_pointer>(__nd->__left_)); - destroy(static_cast<__node_pointer>(__nd->__right_)); - __node_allocator& __na = __node_alloc(); - __node_traits::destroy(__na, std::addressof(__nd->__value_)); - __node_traits::deallocate(__na, __nd, 1); + __begin_node_ = + __end_node()->__left_ ? static_cast<__end_node_pointer>(std::__tree_min(__end_node()->__left_)) : __end_node(); + __size_ = __t.size(); + __t.clear(); // Ensure that __t is in a valid state after moving out the keys } } @@ -1470,12 +1732,12 @@ void __tree<_Tp, _Compare, _Allocator>::swap(__tree& __t) std::__swap_allocator(__node_alloc(), __t.__node_alloc()); swap(__size_, __t.__size_); swap(__value_comp_, __t.__value_comp_); - if (size() == 0) - __begin_node() = __end_node(); + if (__size_ == 0) + __begin_node_ = __end_node(); else __end_node()->__left_->__parent_ = __end_node(); - if (__t.size() == 0) - __t.__begin_node() = __t.__end_node(); + if (__t.__size_ == 0) + __t.__begin_node_ = __t.__end_node(); else __t.__end_node()->__left_->__parent_ = __t.__end_node(); } @@ -1483,8 +1745,8 @@ void __tree<_Tp, _Compare, _Allocator>::swap(__tree& __t) template void __tree<_Tp, _Compare, _Allocator>::clear() _NOEXCEPT { destroy(__root()); - size() = 0; - __begin_node() = __end_node(); + __size_ = 0; + __begin_node_ = __end_node(); __end_node()->__left_ = nullptr; } @@ -1497,7 +1759,7 @@ __tree<_Tp, _Compare, _Allocator>::__find_leaf_low(__end_node_pointer& __parent, __node_pointer __nd = __root(); if (__nd != nullptr) { while (true) { - if (value_comp()(__nd->__value_, __v)) { + if (value_comp()(__nd->__get_value(), __v)) { if (__nd->__right_ != nullptr) __nd = static_cast<__node_pointer>(__nd->__right_); else { @@ -1527,7 +1789,7 @@ __tree<_Tp, _Compare, _Allocator>::__find_leaf_high(__end_node_pointer& __parent __node_pointer __nd = __root(); if (__nd != nullptr) { while (true) { - if (value_comp()(__v, __nd->__value_)) { + if (value_comp()(__v, __nd->__get_value())) { if (__nd->__left_ != nullptr) __nd = static_cast<__node_pointer>(__nd->__left_); else { @@ -1578,92 +1840,91 @@ typename __tree<_Tp, _Compare, _Allocator>::__node_base_pointer& __tree<_Tp, _Co return __find_leaf_low(__parent, __v); } -// Find place to insert if __v doesn't exist -// Set __parent to parent of null leaf -// Return reference to null leaf -// If __v exists, set parent to node of __v and return reference to node of __v +// Find __v +// If __v exists, return the parent of the node of __v and a reference to the pointer to the node of __v. +// If __v doesn't exist, return the parent of the null leaf and a reference to the pointer to the null leaf. template template -typename __tree<_Tp, _Compare, _Allocator>::__node_base_pointer& -__tree<_Tp, _Compare, _Allocator>::__find_equal(__end_node_pointer& __parent, const _Key& __v) { - __node_pointer __nd = __root(); - __node_base_pointer* __nd_ptr = __root_ptr(); - if (__nd != nullptr) { - while (true) { - if (value_comp()(__v, __nd->__value_)) { - if (__nd->__left_ != nullptr) { - __nd_ptr = std::addressof(__nd->__left_); - __nd = static_cast<__node_pointer>(__nd->__left_); - } else { - __parent = static_cast<__end_node_pointer>(__nd); - return __parent->__left_; - } - } else if (value_comp()(__nd->__value_, __v)) { - if (__nd->__right_ != nullptr) { - __nd_ptr = std::addressof(__nd->__right_); - __nd = static_cast<__node_pointer>(__nd->__right_); - } else { - __parent = static_cast<__end_node_pointer>(__nd); - return __nd->__right_; - } - } else { - __parent = static_cast<__end_node_pointer>(__nd); - return *__nd_ptr; - } +_LIBCPP_HIDE_FROM_ABI pair::__end_node_pointer, + typename __tree<_Tp, _Compare, _Allocator>::__node_base_pointer&> +__tree<_Tp, _Compare, _Allocator>::__find_equal(const _Key& __v) { + using _Pair = pair<__end_node_pointer, __node_base_pointer&>; + + __node_pointer __nd = __root(); + + if (__nd == nullptr) { + auto __end = __end_node(); + return _Pair(__end, __end->__left_); + } + + __node_base_pointer* __node_ptr = __root_ptr(); + auto&& __transparent = std::__as_transparent<_Key>(value_comp()); + auto __comp = + __lazy_synth_three_way_comparator<__make_transparent_t<_Key, _Compare>, _Key, value_type>(__transparent); + + while (true) { + auto __comp_res = __comp(__v, __nd->__get_value()); + + if (__comp_res.__less()) { + if (__nd->__left_ == nullptr) + return _Pair(static_cast<__end_node_pointer>(__nd), __nd->__left_); + + __node_ptr = std::addressof(__nd->__left_); + __nd = static_cast<__node_pointer>(__nd->__left_); + } else if (__comp_res.__greater()) { + if (__nd->__right_ == nullptr) + return _Pair(static_cast<__end_node_pointer>(__nd), __nd->__right_); + + __node_ptr = std::addressof(__nd->__right_); + __nd = static_cast<__node_pointer>(__nd->__right_); + } else { + return _Pair(static_cast<__end_node_pointer>(__nd), *__node_ptr); } } - __parent = __end_node(); - return __parent->__left_; } -// Find place to insert if __v doesn't exist +// Find __v // First check prior to __hint. // Next check after __hint. // Next do O(log N) search. -// Set __parent to parent of null leaf -// Return reference to null leaf -// If __v exists, set parent to node of __v and return reference to node of __v +// If __v exists, return the parent of the node of __v and a reference to the pointer to the node of __v. +// If __v doesn't exist, return the parent of the null leaf and a reference to the pointer to the null leaf. template template -typename __tree<_Tp, _Compare, _Allocator>::__node_base_pointer& __tree<_Tp, _Compare, _Allocator>::__find_equal( - const_iterator __hint, __end_node_pointer& __parent, __node_base_pointer& __dummy, const _Key& __v) { - if (__hint == end() || value_comp()(__v, *__hint)) // check before - { +_LIBCPP_HIDE_FROM_ABI pair::__end_node_pointer, + typename __tree<_Tp, _Compare, _Allocator>::__node_base_pointer&> +__tree<_Tp, _Compare, _Allocator>::__find_equal(const_iterator __hint, __node_base_pointer& __dummy, const _Key& __v) { + using _Pair = pair<__end_node_pointer, __node_base_pointer&>; + + if (__hint == end() || value_comp()(__v, *__hint)) { // check before // __v < *__hint const_iterator __prior = __hint; if (__prior == begin() || value_comp()(*--__prior, __v)) { // *prev(__hint) < __v < *__hint - if (__hint.__ptr_->__left_ == nullptr) { - __parent = __hint.__ptr_; - return __parent->__left_; - } else { - __parent = __prior.__ptr_; - return static_cast<__node_base_pointer>(__prior.__ptr_)->__right_; - } + if (__hint.__ptr_->__left_ == nullptr) + return _Pair(__hint.__ptr_, __hint.__ptr_->__left_); + return _Pair(__prior.__ptr_, static_cast<__node_pointer>(__prior.__ptr_)->__right_); } // __v <= *prev(__hint) - return __find_equal(__parent, __v); - } else if (value_comp()(*__hint, __v)) // check after - { + return __find_equal(__v); + } + + if (value_comp()(*__hint, __v)) { // check after // *__hint < __v const_iterator __next = std::next(__hint); if (__next == end() || value_comp()(__v, *__next)) { // *__hint < __v < *std::next(__hint) - if (__hint.__get_np()->__right_ == nullptr) { - __parent = __hint.__ptr_; - return static_cast<__node_base_pointer>(__hint.__ptr_)->__right_; - } else { - __parent = __next.__ptr_; - return __parent->__left_; - } + if (__hint.__get_np()->__right_ == nullptr) + return _Pair(__hint.__ptr_, static_cast<__node_pointer>(__hint.__ptr_)->__right_); + return _Pair(__next.__ptr_, __next.__ptr_->__left_); } // *next(__hint) <= __v - return __find_equal(__parent, __v); + return __find_equal(__v); } + // else __v == *__hint - __parent = __hint.__ptr_; - __dummy = static_cast<__node_base_pointer>(__hint.__ptr_); - return __dummy; + __dummy = static_cast<__node_base_pointer>(__hint.__ptr_); + return _Pair(__hint.__ptr_, __dummy); } template @@ -1674,46 +1935,10 @@ void __tree<_Tp, _Compare, _Allocator>::__insert_node_at( __new_node->__parent_ = __parent; // __new_node->__is_black_ is initialized in __tree_balance_after_insert __child = __new_node; - if (__begin_node()->__left_ != nullptr) - __begin_node() = static_cast<__end_node_pointer>(__begin_node()->__left_); + if (__begin_node_->__left_ != nullptr) + __begin_node_ = static_cast<__end_node_pointer>(__begin_node_->__left_); std::__tree_balance_after_insert(__end_node()->__left_, __child); - ++size(); -} - -template -template -pair::iterator, bool> -__tree<_Tp, _Compare, _Allocator>::__emplace_unique_key_args(_Key const& __k, _Args&&... __args) { - __end_node_pointer __parent; - __node_base_pointer& __child = __find_equal(__parent, __k); - __node_pointer __r = static_cast<__node_pointer>(__child); - bool __inserted = false; - if (__child == nullptr) { - __node_holder __h = __construct_node(std::forward<_Args>(__args)...); - __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get())); - __r = __h.release(); - __inserted = true; - } - return pair(iterator(__r), __inserted); -} - -template -template -pair::iterator, bool> -__tree<_Tp, _Compare, _Allocator>::__emplace_hint_unique_key_args( - const_iterator __p, _Key const& __k, _Args&&... __args) { - __end_node_pointer __parent; - __node_base_pointer __dummy; - __node_base_pointer& __child = __find_equal(__p, __parent, __dummy, __k); - __node_pointer __r = static_cast<__node_pointer>(__child); - bool __inserted = false; - if (__child == nullptr) { - __node_holder __h = __construct_node(std::forward<_Args>(__args)...); - __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get())); - __r = __h.release(); - __inserted = true; - } - return pair(iterator(__r), __inserted); + ++__size_; } template @@ -1722,51 +1947,18 @@ typename __tree<_Tp, _Compare, _Allocator>::__node_holder __tree<_Tp, _Compare, _Allocator>::__construct_node(_Args&&... __args) { __node_allocator& __na = __node_alloc(); __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na)); - __node_traits::construct(__na, std::addressof(__h->__value_), std::forward<_Args>(__args)...); + std::__construct_at(std::addressof(*__h), __na, std::forward<_Args>(__args)...); __h.get_deleter().__value_constructed = true; return __h; } -template -template -pair::iterator, bool> -__tree<_Tp, _Compare, _Allocator>::__emplace_unique_impl(_Args&&... __args) { - __node_holder __h = __construct_node(std::forward<_Args>(__args)...); - __end_node_pointer __parent; - __node_base_pointer& __child = __find_equal(__parent, __h->__value_); - __node_pointer __r = static_cast<__node_pointer>(__child); - bool __inserted = false; - if (__child == nullptr) { - __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get())); - __r = __h.release(); - __inserted = true; - } - return pair(iterator(__r), __inserted); -} - -template -template -typename __tree<_Tp, _Compare, _Allocator>::iterator -__tree<_Tp, _Compare, _Allocator>::__emplace_hint_unique_impl(const_iterator __p, _Args&&... __args) { - __node_holder __h = __construct_node(std::forward<_Args>(__args)...); - __end_node_pointer __parent; - __node_base_pointer __dummy; - __node_base_pointer& __child = __find_equal(__p, __parent, __dummy, __h->__value_); - __node_pointer __r = static_cast<__node_pointer>(__child); - if (__child == nullptr) { - __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get())); - __r = __h.release(); - } - return iterator(__r); -} - template template typename __tree<_Tp, _Compare, _Allocator>::iterator __tree<_Tp, _Compare, _Allocator>::__emplace_multi(_Args&&... __args) { __node_holder __h = __construct_node(std::forward<_Args>(__args)...); __end_node_pointer __parent; - __node_base_pointer& __child = __find_leaf_high(__parent, __h->__value_); + __node_base_pointer& __child = __find_leaf_high(__parent, __h->__get_value()); __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get())); return iterator(static_cast<__node_pointer>(__h.release())); } @@ -1777,53 +1969,19 @@ typename __tree<_Tp, _Compare, _Allocator>::iterator __tree<_Tp, _Compare, _Allocator>::__emplace_hint_multi(const_iterator __p, _Args&&... __args) { __node_holder __h = __construct_node(std::forward<_Args>(__args)...); __end_node_pointer __parent; - __node_base_pointer& __child = __find_leaf(__p, __parent, __h->__value_); + __node_base_pointer& __child = __find_leaf(__p, __parent, __h->__get_value()); __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get())); return iterator(static_cast<__node_pointer>(__h.release())); } -template -pair::iterator, bool> -__tree<_Tp, _Compare, _Allocator>::__node_assign_unique(const value_type& __v, __node_pointer __nd) { - __end_node_pointer __parent; - __node_base_pointer& __child = __find_equal(__parent, __v); - __node_pointer __r = static_cast<__node_pointer>(__child); - bool __inserted = false; - if (__child == nullptr) { - __assign_value(__nd->__value_, __v); - __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__nd)); - __r = __nd; - __inserted = true; - } - return pair(iterator(__r), __inserted); -} - -template -typename __tree<_Tp, _Compare, _Allocator>::iterator -__tree<_Tp, _Compare, _Allocator>::__node_insert_multi(__node_pointer __nd) { - __end_node_pointer __parent; - __node_base_pointer& __child = __find_leaf_high(__parent, __nd->__value_); - __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__nd)); - return iterator(__nd); -} - -template -typename __tree<_Tp, _Compare, _Allocator>::iterator -__tree<_Tp, _Compare, _Allocator>::__node_insert_multi(const_iterator __p, __node_pointer __nd) { - __end_node_pointer __parent; - __node_base_pointer& __child = __find_leaf(__p, __parent, __nd->__value_); - __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__nd)); - return iterator(__nd); -} - template typename __tree<_Tp, _Compare, _Allocator>::iterator __tree<_Tp, _Compare, _Allocator>::__remove_node_pointer(__node_pointer __ptr) _NOEXCEPT { iterator __r(__ptr); ++__r; - if (__begin_node() == __ptr) - __begin_node() = __r.__ptr_; - --size(); + if (__begin_node_ == __ptr) + __begin_node_ = __r.__ptr_; + --__size_; std::__tree_remove(__end_node()->__left_, static_cast<__node_base_pointer>(__ptr)); return __r; } @@ -1837,8 +1995,7 @@ __tree<_Tp, _Compare, _Allocator>::__node_handle_insert_unique(_NodeHandle&& __n return _InsertReturnType{end(), false, _NodeHandle()}; __node_pointer __ptr = __nh.__ptr_; - __end_node_pointer __parent; - __node_base_pointer& __child = __find_equal(__parent, __ptr->__value_); + auto [__parent, __child] = __find_equal(__ptr->__get_value()); if (__child != nullptr) return _InsertReturnType{iterator(static_cast<__node_pointer>(__child)), false, std::move(__nh)}; @@ -1855,10 +2012,9 @@ __tree<_Tp, _Compare, _Allocator>::__node_handle_insert_unique(const_iterator __ return end(); __node_pointer __ptr = __nh.__ptr_; - __end_node_pointer __parent; __node_base_pointer __dummy; - __node_base_pointer& __child = __find_equal(__hint, __parent, __dummy, __ptr->__value_); - __node_pointer __r = static_cast<__node_pointer>(__child); + auto [__parent, __child] = __find_equal(__hint, __dummy, __ptr->__get_value()); + __node_pointer __r = static_cast<__node_pointer>(__child); if (__child == nullptr) { __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__ptr)); __r = __ptr; @@ -1885,14 +2041,12 @@ _LIBCPP_HIDE_FROM_ABI _NodeHandle __tree<_Tp, _Compare, _Allocator>::__node_hand } template -template -_LIBCPP_HIDE_FROM_ABI void __tree<_Tp, _Compare, _Allocator>::__node_handle_merge_unique(_Tree& __source) { - static_assert(is_same::value, ""); - - for (typename _Tree::iterator __i = __source.begin(); __i != __source.end();) { +template +_LIBCPP_HIDE_FROM_ABI void +__tree<_Tp, _Compare, _Allocator>::__node_handle_merge_unique(__tree<_Tp, _Comp2, _Allocator>& __source) { + for (iterator __i = __source.begin(); __i != __source.end();) { __node_pointer __src_ptr = __i.__get_np(); - __end_node_pointer __parent; - __node_base_pointer& __child = __find_equal(__parent, __src_ptr->__value_); + auto [__parent, __child] = __find_equal(__src_ptr->__get_value()); ++__i; if (__child != nullptr) continue; @@ -1909,7 +2063,7 @@ __tree<_Tp, _Compare, _Allocator>::__node_handle_insert_multi(_NodeHandle&& __nh return end(); __node_pointer __ptr = __nh.__ptr_; __end_node_pointer __parent; - __node_base_pointer& __child = __find_leaf_high(__parent, __ptr->__value_); + __node_base_pointer& __child = __find_leaf_high(__parent, __ptr->__get_value()); __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__ptr)); __nh.__release_ptr(); return iterator(__ptr); @@ -1924,21 +2078,20 @@ __tree<_Tp, _Compare, _Allocator>::__node_handle_insert_multi(const_iterator __h __node_pointer __ptr = __nh.__ptr_; __end_node_pointer __parent; - __node_base_pointer& __child = __find_leaf(__hint, __parent, __ptr->__value_); + __node_base_pointer& __child = __find_leaf(__hint, __parent, __ptr->__get_value()); __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__ptr)); __nh.__release_ptr(); return iterator(__ptr); } template -template -_LIBCPP_HIDE_FROM_ABI void __tree<_Tp, _Compare, _Allocator>::__node_handle_merge_multi(_Tree& __source) { - static_assert(is_same::value, ""); - - for (typename _Tree::iterator __i = __source.begin(); __i != __source.end();) { +template +_LIBCPP_HIDE_FROM_ABI void +__tree<_Tp, _Compare, _Allocator>::__node_handle_merge_multi(__tree<_Tp, _Comp2, _Allocator>& __source) { + for (iterator __i = __source.begin(); __i != __source.end();) { __node_pointer __src_ptr = __i.__get_np(); __end_node_pointer __parent; - __node_base_pointer& __child = __find_leaf_high(__parent, __src_ptr->__value_); + __node_base_pointer& __child = __find_leaf_high(__parent, __src_ptr->__get_value()); ++__i; __source.__remove_node_pointer(__src_ptr); __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__src_ptr)); @@ -1987,34 +2140,17 @@ __tree<_Tp, _Compare, _Allocator>::__erase_multi(const _Key& __k) { return __r; } -template -template -typename __tree<_Tp, _Compare, _Allocator>::iterator __tree<_Tp, _Compare, _Allocator>::find(const _Key& __v) { - iterator __p = __lower_bound(__v, __root(), __end_node()); - if (__p != end() && !value_comp()(__v, *__p)) - return __p; - return end(); -} - -template -template -typename __tree<_Tp, _Compare, _Allocator>::const_iterator -__tree<_Tp, _Compare, _Allocator>::find(const _Key& __v) const { - const_iterator __p = __lower_bound(__v, __root(), __end_node()); - if (__p != end() && !value_comp()(__v, *__p)) - return __p; - return end(); -} - template template typename __tree<_Tp, _Compare, _Allocator>::size_type __tree<_Tp, _Compare, _Allocator>::__count_unique(const _Key& __k) const { __node_pointer __rt = __root(); + auto __comp = __lazy_synth_three_way_comparator(value_comp()); while (__rt != nullptr) { - if (value_comp()(__k, __rt->__value_)) { + auto __comp_res = __comp(__k, __rt->__get_value()); + if (__comp_res.__less()) { __rt = static_cast<__node_pointer>(__rt->__left_); - } else if (value_comp()(__rt->__value_, __k)) + } else if (__comp_res.__greater()) __rt = static_cast<__node_pointer>(__rt->__right_); else return 1; @@ -2028,26 +2164,28 @@ typename __tree<_Tp, _Compare, _Allocator>::size_type __tree<_Tp, _Compare, _Allocator>::__count_multi(const _Key& __k) const { __end_node_pointer __result = __end_node(); __node_pointer __rt = __root(); + auto __comp = __lazy_synth_three_way_comparator(value_comp()); while (__rt != nullptr) { - if (value_comp()(__k, __rt->__value_)) { + auto __comp_res = __comp(__k, __rt->__get_value()); + if (__comp_res.__less()) { __result = static_cast<__end_node_pointer>(__rt); __rt = static_cast<__node_pointer>(__rt->__left_); - } else if (value_comp()(__rt->__value_, __k)) + } else if (__comp_res.__greater()) __rt = static_cast<__node_pointer>(__rt->__right_); else return std::distance( - __lower_bound(__k, static_cast<__node_pointer>(__rt->__left_), static_cast<__end_node_pointer>(__rt)), - __upper_bound(__k, static_cast<__node_pointer>(__rt->__right_), __result)); + __lower_bound_multi(__k, static_cast<__node_pointer>(__rt->__left_), static_cast<__end_node_pointer>(__rt)), + __upper_bound_multi(__k, static_cast<__node_pointer>(__rt->__right_), __result)); } return 0; } template template -typename __tree<_Tp, _Compare, _Allocator>::iterator -__tree<_Tp, _Compare, _Allocator>::__lower_bound(const _Key& __v, __node_pointer __root, __end_node_pointer __result) { +typename __tree<_Tp, _Compare, _Allocator>::iterator __tree<_Tp, _Compare, _Allocator>::__lower_bound_multi( + const _Key& __v, __node_pointer __root, __end_node_pointer __result) { while (__root != nullptr) { - if (!value_comp()(__root->__value_, __v)) { + if (!value_comp()(__root->__get_value(), __v)) { __result = static_cast<__end_node_pointer>(__root); __root = static_cast<__node_pointer>(__root->__left_); } else @@ -2058,10 +2196,10 @@ __tree<_Tp, _Compare, _Allocator>::__lower_bound(const _Key& __v, __node_pointer template template -typename __tree<_Tp, _Compare, _Allocator>::const_iterator __tree<_Tp, _Compare, _Allocator>::__lower_bound( +typename __tree<_Tp, _Compare, _Allocator>::const_iterator __tree<_Tp, _Compare, _Allocator>::__lower_bound_multi( const _Key& __v, __node_pointer __root, __end_node_pointer __result) const { while (__root != nullptr) { - if (!value_comp()(__root->__value_, __v)) { + if (!value_comp()(__root->__get_value(), __v)) { __result = static_cast<__end_node_pointer>(__root); __root = static_cast<__node_pointer>(__root->__left_); } else @@ -2072,10 +2210,10 @@ typename __tree<_Tp, _Compare, _Allocator>::const_iterator __tree<_Tp, _Compare, template template -typename __tree<_Tp, _Compare, _Allocator>::iterator -__tree<_Tp, _Compare, _Allocator>::__upper_bound(const _Key& __v, __node_pointer __root, __end_node_pointer __result) { +typename __tree<_Tp, _Compare, _Allocator>::iterator __tree<_Tp, _Compare, _Allocator>::__upper_bound_multi( + const _Key& __v, __node_pointer __root, __end_node_pointer __result) { while (__root != nullptr) { - if (value_comp()(__v, __root->__value_)) { + if (value_comp()(__v, __root->__get_value())) { __result = static_cast<__end_node_pointer>(__root); __root = static_cast<__node_pointer>(__root->__left_); } else @@ -2086,10 +2224,10 @@ __tree<_Tp, _Compare, _Allocator>::__upper_bound(const _Key& __v, __node_pointer template template -typename __tree<_Tp, _Compare, _Allocator>::const_iterator __tree<_Tp, _Compare, _Allocator>::__upper_bound( +typename __tree<_Tp, _Compare, _Allocator>::const_iterator __tree<_Tp, _Compare, _Allocator>::__upper_bound_multi( const _Key& __v, __node_pointer __root, __end_node_pointer __result) const { while (__root != nullptr) { - if (value_comp()(__v, __root->__value_)) { + if (value_comp()(__v, __root->__get_value())) { __result = static_cast<__end_node_pointer>(__root); __root = static_cast<__node_pointer>(__root->__left_); } else @@ -2102,14 +2240,16 @@ template template pair::iterator, typename __tree<_Tp, _Compare, _Allocator>::iterator> __tree<_Tp, _Compare, _Allocator>::__equal_range_unique(const _Key& __k) { - typedef pair _Pp; + using _Pp = pair; __end_node_pointer __result = __end_node(); __node_pointer __rt = __root(); + auto __comp = __lazy_synth_three_way_comparator(value_comp()); while (__rt != nullptr) { - if (value_comp()(__k, __rt->__value_)) { + auto __comp_res = __comp(__k, __rt->__get_value()); + if (__comp_res.__less()) { __result = static_cast<__end_node_pointer>(__rt); __rt = static_cast<__node_pointer>(__rt->__left_); - } else if (value_comp()(__rt->__value_, __k)) + } else if (__comp_res.__greater()) __rt = static_cast<__node_pointer>(__rt->__right_); else return _Pp(iterator(__rt), @@ -2124,14 +2264,16 @@ template pair::const_iterator, typename __tree<_Tp, _Compare, _Allocator>::const_iterator> __tree<_Tp, _Compare, _Allocator>::__equal_range_unique(const _Key& __k) const { - typedef pair _Pp; + using _Pp = pair; __end_node_pointer __result = __end_node(); __node_pointer __rt = __root(); + auto __comp = __lazy_synth_three_way_comparator(value_comp()); while (__rt != nullptr) { - if (value_comp()(__k, __rt->__value_)) { + auto __comp_res = __comp(__k, __rt->__get_value()); + if (__comp_res.__less()) { __result = static_cast<__end_node_pointer>(__rt); __rt = static_cast<__node_pointer>(__rt->__left_); - } else if (value_comp()(__rt->__value_, __k)) + } else if (__comp_res.__greater()) __rt = static_cast<__node_pointer>(__rt->__right_); else return _Pp( @@ -2146,18 +2288,21 @@ template template pair::iterator, typename __tree<_Tp, _Compare, _Allocator>::iterator> __tree<_Tp, _Compare, _Allocator>::__equal_range_multi(const _Key& __k) { - typedef pair _Pp; + using _Pp = pair; __end_node_pointer __result = __end_node(); - __node_pointer __rt = __root(); + __node_pointer __rt = __root(); + auto __comp = __lazy_synth_three_way_comparator(value_comp()); while (__rt != nullptr) { - if (value_comp()(__k, __rt->__value_)) { + auto __comp_res = __comp(__k, __rt->__get_value()); + if (__comp_res.__less()) { __result = static_cast<__end_node_pointer>(__rt); __rt = static_cast<__node_pointer>(__rt->__left_); - } else if (value_comp()(__rt->__value_, __k)) + } else if (__comp_res.__greater()) __rt = static_cast<__node_pointer>(__rt->__right_); else - return _Pp(__lower_bound(__k, static_cast<__node_pointer>(__rt->__left_), static_cast<__end_node_pointer>(__rt)), - __upper_bound(__k, static_cast<__node_pointer>(__rt->__right_), __result)); + return _Pp( + __lower_bound_multi(__k, static_cast<__node_pointer>(__rt->__left_), static_cast<__end_node_pointer>(__rt)), + __upper_bound_multi(__k, static_cast<__node_pointer>(__rt->__right_), __result)); } return _Pp(iterator(__result), iterator(__result)); } @@ -2167,18 +2312,21 @@ template pair::const_iterator, typename __tree<_Tp, _Compare, _Allocator>::const_iterator> __tree<_Tp, _Compare, _Allocator>::__equal_range_multi(const _Key& __k) const { - typedef pair _Pp; + using _Pp = pair; __end_node_pointer __result = __end_node(); - __node_pointer __rt = __root(); + __node_pointer __rt = __root(); + auto __comp = __lazy_synth_three_way_comparator(value_comp()); while (__rt != nullptr) { - if (value_comp()(__k, __rt->__value_)) { + auto __comp_res = __comp(__k, __rt->__get_value()); + if (__comp_res.__less()) { __result = static_cast<__end_node_pointer>(__rt); __rt = static_cast<__node_pointer>(__rt->__left_); - } else if (value_comp()(__rt->__value_, __k)) + } else if (__comp_res.__greater()) __rt = static_cast<__node_pointer>(__rt->__right_); else - return _Pp(__lower_bound(__k, static_cast<__node_pointer>(__rt->__left_), static_cast<__end_node_pointer>(__rt)), - __upper_bound(__k, static_cast<__node_pointer>(__rt->__right_), __result)); + return _Pp( + __lower_bound_multi(__k, static_cast<__node_pointer>(__rt->__left_), static_cast<__end_node_pointer>(__rt)), + __upper_bound_multi(__k, static_cast<__node_pointer>(__rt->__right_), __result)); } return _Pp(const_iterator(__result), const_iterator(__result)); } @@ -2187,13 +2335,13 @@ template typename __tree<_Tp, _Compare, _Allocator>::__node_holder __tree<_Tp, _Compare, _Allocator>::remove(const_iterator __p) _NOEXCEPT { __node_pointer __np = __p.__get_np(); - if (__begin_node() == __p.__ptr_) { + if (__begin_node_ == __p.__ptr_) { if (__np->__right_ != nullptr) - __begin_node() = static_cast<__end_node_pointer>(__np->__right_); + __begin_node_ = static_cast<__end_node_pointer>(__np->__right_); else - __begin_node() = static_cast<__end_node_pointer>(__np->__parent_); + __begin_node_ = static_cast<__end_node_pointer>(__np->__parent_); } - --size(); + --__size_; std::__tree_remove(__end_node()->__left_, static_cast<__node_base_pointer>(__np)); return __node_holder(__np, _Dp(__node_alloc(), true)); } diff --git a/lib/libcxx/include/__tuple/sfinae_helpers.h b/lib/libcxx/include/__tuple/sfinae_helpers.h index 9fe5e84e2f..f81048f406 100644 --- a/lib/libcxx/include/__tuple/sfinae_helpers.h +++ b/lib/libcxx/include/__tuple/sfinae_helpers.h @@ -10,20 +10,6 @@ #define _LIBCPP___TUPLE_SFINAE_HELPERS_H #include <__config> -#include <__cstddef/size_t.h> -#include <__fwd/tuple.h> -#include <__tuple/make_tuple_types.h> -#include <__tuple/tuple_element.h> -#include <__tuple/tuple_like_ext.h> -#include <__tuple/tuple_size.h> -#include <__tuple/tuple_types.h> -#include <__type_traits/conjunction.h> -#include <__type_traits/enable_if.h> -#include <__type_traits/integral_constant.h> -#include <__type_traits/is_constructible.h> -#include <__type_traits/is_same.h> -#include <__type_traits/remove_cvref.h> -#include <__type_traits/remove_reference.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -33,36 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #ifndef _LIBCPP_CXX03_LANG -struct __tuple_sfinae_base { - template