From 75838d9e8be67b0f707bdb372abe71063a79100a Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Sun, 27 Nov 2022 18:32:29 +0000 Subject: [PATCH] Implement more llvm simd intrinsics for AArch64 --- src/intrinsics/llvm_aarch64.rs | 80 ++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/src/intrinsics/llvm_aarch64.rs b/src/intrinsics/llvm_aarch64.rs index f9f34e16aec1..1daf14288243 100644 --- a/src/intrinsics/llvm_aarch64.rs +++ b/src/intrinsics/llvm_aarch64.rs @@ -32,6 +32,14 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( }); } + _ if intrinsic.starts_with("llvm.aarch64.neon.rbit.v") => { + intrinsic_args!(fx, args => (a); intrinsic); + + simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| { + fx.bcx.ins().bitrev(lane) + }); + } + _ if intrinsic.starts_with("llvm.aarch64.neon.sqadd.v") => { intrinsic_args!(fx, args => (x, y); intrinsic); @@ -48,6 +56,78 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( }); } + _ if intrinsic.starts_with("llvm.aarch64.neon.smax.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_pair_for_each_lane(fx, x, y, ret, &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| { + let gt = fx.bcx.ins().icmp(IntCC::SignedGreaterThan, x_lane, y_lane); + fx.bcx.ins().select(gt, x_lane, y_lane) + }); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.umax.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_pair_for_each_lane(fx, x, y, ret, &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| { + let gt = fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, x_lane, y_lane); + fx.bcx.ins().select(gt, x_lane, y_lane) + }); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.smaxv.i") => { + intrinsic_args!(fx, args => (v); intrinsic); + + simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| { + let gt = fx.bcx.ins().icmp(IntCC::SignedGreaterThan, a, b); + fx.bcx.ins().select(gt, a, b) + }); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.umaxv.i") => { + intrinsic_args!(fx, args => (v); intrinsic); + + simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| { + let gt = fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, a, b); + fx.bcx.ins().select(gt, a, b) + }); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.smin.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_pair_for_each_lane(fx, x, y, ret, &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| { + let gt = fx.bcx.ins().icmp(IntCC::SignedLessThan, x_lane, y_lane); + fx.bcx.ins().select(gt, x_lane, y_lane) + }); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.umin.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_pair_for_each_lane(fx, x, y, ret, &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| { + let gt = fx.bcx.ins().icmp(IntCC::UnsignedLessThan, x_lane, y_lane); + fx.bcx.ins().select(gt, x_lane, y_lane) + }); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.sminv.i") => { + intrinsic_args!(fx, args => (v); intrinsic); + + simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| { + let gt = fx.bcx.ins().icmp(IntCC::SignedLessThan, a, b); + fx.bcx.ins().select(gt, a, b) + }); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.uminv.i") => { + intrinsic_args!(fx, args => (v); intrinsic); + + simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| { + let gt = fx.bcx.ins().icmp(IntCC::UnsignedLessThan, a, b); + fx.bcx.ins().select(gt, a, b) + }); + } + /* _ if intrinsic.starts_with("llvm.aarch64.neon.sshl.v") || intrinsic.starts_with("llvm.aarch64.neon.sqshl.v")