From d89f307ea251d78df4457140cdc23f335940885c Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Mon, 18 Nov 2019 20:58:33 +0100 Subject: [PATCH] Move simd intrinsics to intrinsics/simd.rs --- src/intrinsics/mod.rs | 246 +++++++++-------------------------------- src/intrinsics/simd.rs | 171 ++++++++++++++++++++++++++++ 2 files changed, 221 insertions(+), 196 deletions(-) create mode 100644 src/intrinsics/simd.rs diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index 7e4144d29e10..ca10399cb9cb 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -1,4 +1,5 @@ pub mod llvm; +mod simd; use crate::prelude::*; @@ -70,7 +71,7 @@ } } -macro_rules! call_intrinsic_match { +macro call_intrinsic_match { ($fx:expr, $intrinsic:expr, $substs:expr, $ret:expr, $destination:expr, $args:expr, $( $name:ident($($arg:ident),*) -> $ty:ident => $func:ident, )*) => { @@ -102,32 +103,28 @@ macro_rules! call_intrinsic_match { } } -macro_rules! atomic_binop_return_old { - ($fx:expr, $op:ident<$T:ident>($ptr:ident, $src:ident) -> $ret:ident) => { - let clif_ty = $fx.clif_type($T).unwrap(); - let old = $fx.bcx.ins().load(clif_ty, MemFlags::new(), $ptr, 0); - let new = $fx.bcx.ins().$op(old, $src); - $fx.bcx.ins().store(MemFlags::new(), new, $ptr, 0); - $ret.write_cvalue($fx, CValue::by_val(old, $fx.layout_of($T))); - }; +macro atomic_binop_return_old($fx:expr, $op:ident<$T:ident>($ptr:ident, $src:ident) -> $ret:ident) { + let clif_ty = $fx.clif_type($T).unwrap(); + let old = $fx.bcx.ins().load(clif_ty, MemFlags::new(), $ptr, 0); + let new = $fx.bcx.ins().$op(old, $src); + $fx.bcx.ins().store(MemFlags::new(), new, $ptr, 0); + $ret.write_cvalue($fx, CValue::by_val(old, $fx.layout_of($T))); } -macro_rules! atomic_minmax { - ($fx:expr, $cc:expr, <$T:ident> ($ptr:ident, $src:ident) -> $ret:ident) => { - // Read old - let clif_ty = $fx.clif_type($T).unwrap(); - let old = $fx.bcx.ins().load(clif_ty, MemFlags::new(), $ptr, 0); +macro atomic_minmax($fx:expr, $cc:expr, <$T:ident> ($ptr:ident, $src:ident) -> $ret:ident) { + // Read old + let clif_ty = $fx.clif_type($T).unwrap(); + let old = $fx.bcx.ins().load(clif_ty, MemFlags::new(), $ptr, 0); - // Compare - let is_eq = codegen_icmp($fx, IntCC::SignedGreaterThan, old, $src); - let new = $fx.bcx.ins().select(is_eq, old, $src); + // Compare + let is_eq = codegen_icmp($fx, IntCC::SignedGreaterThan, old, $src); + let new = $fx.bcx.ins().select(is_eq, old, $src); - // Write new - $fx.bcx.ins().store(MemFlags::new(), new, $ptr, 0); + // Write new + $fx.bcx.ins().store(MemFlags::new(), new, $ptr, 0); - let ret_val = CValue::by_val(old, $ret.layout()); - $ret.write_cvalue($fx, ret_val); - }; + let ret_val = CValue::by_val(old, $ret.layout()); + $ret.write_cvalue($fx, ret_val); } fn lane_type_and_count<'tcx>( @@ -206,7 +203,7 @@ fn bool_to_zero_or_max_uint<'tcx>( CValue::by_val(res, layout) } -macro_rules! simd_cmp { +macro simd_cmp { ($fx:expr, $intrinsic:expr, $cc:ident($x:ident, $y:ident) -> $ret:ident) => { simd_for_each_lane( $fx, @@ -222,7 +219,7 @@ macro_rules! simd_cmp { bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane) }, ); - }; + }, ($fx:expr, $intrinsic:expr, $cc_u:ident|$cc_s:ident($x:ident, $y:ident) -> $ret:ident) => { simd_for_each_lane( $fx, @@ -239,10 +236,10 @@ macro_rules! simd_cmp { bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane) }, ); - }; + }, } -macro_rules! simd_int_binop { +macro simd_int_binop { ($fx:expr, $intrinsic:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => { simd_for_each_lane( $fx, @@ -258,7 +255,7 @@ macro_rules! simd_int_binop { CValue::by_val(res_lane, ret_lane_layout) }, ); - }; + }, ($fx:expr, $intrinsic:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $ret:ident) => { simd_for_each_lane( $fx, @@ -275,10 +272,10 @@ macro_rules! simd_int_binop { CValue::by_val(res_lane, ret_lane_layout) }, ); - }; + }, } -macro_rules! simd_int_flt_binop { +macro simd_int_flt_binop { ($fx:expr, $intrinsic:expr, $op:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => { simd_for_each_lane( $fx, @@ -295,7 +292,7 @@ macro_rules! simd_int_flt_binop { CValue::by_val(res_lane, ret_lane_layout) }, ); - }; + }, ($fx:expr, $intrinsic:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => { simd_for_each_lane( $fx, @@ -313,26 +310,24 @@ macro_rules! simd_int_flt_binop { CValue::by_val(res_lane, ret_lane_layout) }, ); - }; + }, } -macro_rules! simd_flt_binop { - ($fx:expr, $intrinsic:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => { - simd_for_each_lane( - $fx, - $intrinsic, - $x, - $y, - $ret, - |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { - let res_lane = match lane_layout.ty.kind { - ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane), - _ => unreachable!("{:?}", lane_layout.ty), - }; - CValue::by_val(res_lane, ret_lane_layout) - }, - ); - }; +macro simd_flt_binop($fx:expr, $intrinsic:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) { + simd_for_each_lane( + $fx, + $intrinsic, + $x, + $y, + $ret, + |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.kind { + ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane), + _ => unreachable!("{:?}", lane_layout.ty), + }; + CValue::by_val(res_lane, ret_lane_layout) + }, + ); } pub fn codegen_intrinsic_call<'tcx>( @@ -371,6 +366,13 @@ pub fn codegen_intrinsic_call<'tcx>( } }; + if intrinsic.starts_with("simd_") { + self::simd::codegen_simd_intrinsic_call(fx, instance, args, ret, span); + let ret_ebb = fx.get_ebb(destination.expect("SIMD intrinsics don't diverge").1); + fx.bcx.ins().jump(ret_ebb, &[]); + return; + } + let usize_layout = fx.layout_of(fx.tcx.types.usize); call_intrinsic_match! { @@ -944,154 +946,6 @@ fn swap(bcx: &mut FunctionBuilder, v: Value) -> Value { ret.write_cvalue(fx, val); }; - simd_cast, (c a) { - let (lane_layout, lane_count) = lane_type_and_count(fx, a.layout(), intrinsic); - let (ret_lane_layout, ret_lane_count) = lane_type_and_count(fx, ret.layout(), intrinsic); - assert_eq!(lane_count, ret_lane_count); - - let ret_lane_ty = fx.clif_type(ret_lane_layout.ty).unwrap(); - - let from_signed = type_sign(lane_layout.ty); - let to_signed = type_sign(ret_lane_layout.ty); - - for lane in 0..lane_count { - let lane = mir::Field::new(lane.try_into().unwrap()); - - let a_lane = a.value_field(fx, lane).load_scalar(fx); - let res = clif_int_or_float_cast(fx, a_lane, from_signed, ret_lane_ty, to_signed); - ret.place_field(fx, lane).write_cvalue(fx, CValue::by_val(res, ret_lane_layout)); - } - }; - - simd_eq, (c x, c y) { - simd_cmp!(fx, intrinsic, Equal(x, y) -> ret); - }; - simd_ne, (c x, c y) { - simd_cmp!(fx, intrinsic, NotEqual(x, y) -> ret); - }; - simd_lt, (c x, c y) { - simd_cmp!(fx, intrinsic, UnsignedLessThan|SignedLessThan(x, y) -> ret); - }; - simd_le, (c x, c y) { - simd_cmp!(fx, intrinsic, UnsignedLessThanOrEqual|SignedLessThanOrEqual(x, y) -> ret); - }; - simd_gt, (c x, c y) { - simd_cmp!(fx, intrinsic, UnsignedGreaterThan|SignedGreaterThan(x, y) -> ret); - }; - simd_ge, (c x, c y) { - simd_cmp!(fx, intrinsic, UnsignedGreaterThanOrEqual|SignedGreaterThanOrEqual(x, y) -> ret); - }; - - // simd_shuffle32(x: T, y: T, idx: [u32; 32]) -> U - _ if intrinsic.starts_with("simd_shuffle"), (c x, c y, o idx) { - let n: u32 = intrinsic["simd_shuffle".len()..].parse().unwrap(); - - assert_eq!(x.layout(), y.layout()); - let layout = x.layout(); - - let (lane_type, lane_count) = lane_type_and_count(fx, layout, intrinsic); - let (ret_lane_type, ret_lane_count) = lane_type_and_count(fx, ret.layout(), intrinsic); - - assert_eq!(lane_type, ret_lane_type); - assert_eq!(n, ret_lane_count); - - let total_len = lane_count * 2; - - let indexes = { - use rustc::mir::interpret::*; - let idx_const = crate::constant::mir_operand_get_const_val(fx, idx).expect("simd_shuffle* idx not const"); - - let idx_bytes = match idx_const.val { - ty::ConstKind::Value(ConstValue::ByRef { alloc, offset }) => { - let ptr = Pointer::new(AllocId(0 /* dummy */), offset); - let size = Size::from_bytes(4 * u64::from(ret_lane_count) /* size_of([u32; ret_lane_count]) */); - alloc.get_bytes(fx, ptr, size).unwrap() - } - _ => unreachable!("{:?}", idx_const), - }; - - (0..ret_lane_count).map(|i| { - let i = usize::try_from(i).unwrap(); - let idx = rustc::mir::interpret::read_target_uint( - fx.tcx.data_layout.endian, - &idx_bytes[4*i.. 4*i + 4], - ).expect("read_target_uint"); - u32::try_from(idx).expect("try_from u32") - }).collect::>() - }; - - for &idx in &indexes { - assert!(idx < total_len, "idx {} out of range 0..{}", idx, total_len); - } - - for (out_idx, in_idx) in indexes.into_iter().enumerate() { - let in_lane = if in_idx < lane_count { - x.value_field(fx, mir::Field::new(in_idx.try_into().unwrap())) - } else { - y.value_field(fx, mir::Field::new((in_idx - lane_count).try_into().unwrap())) - }; - let out_lane = ret.place_field(fx, mir::Field::new(out_idx)); - out_lane.write_cvalue(fx, in_lane); - } - }; - - simd_extract, (c v, o idx) { - let idx_const = if let Some(idx_const) = crate::constant::mir_operand_get_const_val(fx, idx) { - idx_const - } else { - fx.tcx.sess.span_warn( - fx.mir.span, - "`#[rustc_arg_required_const(..)]` is not yet supported. Calling this function will panic.", - ); - crate::trap::trap_panic(fx, "`#[rustc_arg_required_const(..)]` is not yet supported."); - return; - }; - - let idx = idx_const.val.try_to_bits(Size::from_bytes(4 /* u32*/)).expect(&format!("kind not scalar: {:?}", idx_const)); - let (_lane_type, lane_count) = lane_type_and_count(fx, v.layout(), intrinsic); - if idx >= lane_count.into() { - fx.tcx.sess.span_fatal(fx.mir.span, &format!("[simd_extract] idx {} >= lane_count {}", idx, lane_count)); - } - - let ret_lane = v.value_field(fx, mir::Field::new(idx.try_into().unwrap())); - ret.write_cvalue(fx, ret_lane); - }; - - simd_add, (c x, c y) { - simd_int_flt_binop!(fx, intrinsic, iadd|fadd(x, y) -> ret); - }; - simd_sub, (c x, c y) { - simd_int_flt_binop!(fx, intrinsic, isub|fsub(x, y) -> ret); - }; - simd_mul, (c x, c y) { - simd_int_flt_binop!(fx, intrinsic, imul|fmul(x, y) -> ret); - }; - simd_div, (c x, c y) { - simd_int_flt_binop!(fx, intrinsic, udiv|sdiv|fdiv(x, y) -> ret); - }; - simd_shl, (c x, c y) { - simd_int_binop!(fx, intrinsic, ishl(x, y) -> ret); - }; - simd_shr, (c x, c y) { - simd_int_binop!(fx, intrinsic, ushr|sshr(x, y) -> ret); - }; - simd_and, (c x, c y) { - simd_int_binop!(fx, intrinsic, band(x, y) -> ret); - }; - simd_or, (c x, c y) { - simd_int_binop!(fx, intrinsic, bor(x, y) -> ret); - }; - simd_xor, (c x, c y) { - simd_int_binop!(fx, intrinsic, bxor(x, y) -> ret); - }; - - simd_fmin, (c x, c y) { - simd_flt_binop!(fx, intrinsic, fmin(x, y) -> ret); - }; - simd_fmax, (c x, c y) { - simd_flt_binop!(fx, intrinsic, fmax(x, y) -> ret); - }; - try, (v f, v data, v _local_ptr) { // FIXME once unwinding is supported, change this to actually catch panics let f_sig = fx.bcx.func.import_signature(Signature { diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs new file mode 100644 index 000000000000..2b6447de894f --- /dev/null +++ b/src/intrinsics/simd.rs @@ -0,0 +1,171 @@ +use crate::prelude::*; +use super::*; + +pub fn codegen_simd_intrinsic_call<'tcx>( + fx: &mut FunctionCx<'_, 'tcx, impl Backend>, + instance: Instance<'tcx>, + args: &[mir::Operand<'tcx>], + ret: CPlace<'tcx>, + span: Span, +) { + let def_id = instance.def_id(); + let substs = instance.substs; + + let intrinsic = fx.tcx.item_name(def_id).as_str(); + let intrinsic = &intrinsic[..]; + + intrinsic_match! { + fx, intrinsic, substs, args, + _ => { + fx.tcx.sess.fatal(&format!("Unknown SIMD intrinsic {}", intrinsic)); + }; + + simd_cast, (c a) { + let (lane_layout, lane_count) = lane_type_and_count(fx, a.layout(), intrinsic); + let (ret_lane_layout, ret_lane_count) = lane_type_and_count(fx, ret.layout(), intrinsic); + assert_eq!(lane_count, ret_lane_count); + + let ret_lane_ty = fx.clif_type(ret_lane_layout.ty).unwrap(); + + let from_signed = type_sign(lane_layout.ty); + let to_signed = type_sign(ret_lane_layout.ty); + + for lane in 0..lane_count { + let lane = mir::Field::new(lane.try_into().unwrap()); + + let a_lane = a.value_field(fx, lane).load_scalar(fx); + let res = clif_int_or_float_cast(fx, a_lane, from_signed, ret_lane_ty, to_signed); + ret.place_field(fx, lane).write_cvalue(fx, CValue::by_val(res, ret_lane_layout)); + } + }; + + simd_eq, (c x, c y) { + simd_cmp!(fx, intrinsic, Equal(x, y) -> ret); + }; + simd_ne, (c x, c y) { + simd_cmp!(fx, intrinsic, NotEqual(x, y) -> ret); + }; + simd_lt, (c x, c y) { + simd_cmp!(fx, intrinsic, UnsignedLessThan|SignedLessThan(x, y) -> ret); + }; + simd_le, (c x, c y) { + simd_cmp!(fx, intrinsic, UnsignedLessThanOrEqual|SignedLessThanOrEqual(x, y) -> ret); + }; + simd_gt, (c x, c y) { + simd_cmp!(fx, intrinsic, UnsignedGreaterThan|SignedGreaterThan(x, y) -> ret); + }; + simd_ge, (c x, c y) { + simd_cmp!(fx, intrinsic, UnsignedGreaterThanOrEqual|SignedGreaterThanOrEqual(x, y) -> ret); + }; + + // simd_shuffle32(x: T, y: T, idx: [u32; 32]) -> U + _ if intrinsic.starts_with("simd_shuffle"), (c x, c y, o idx) { + let n: u32 = intrinsic["simd_shuffle".len()..].parse().unwrap(); + + assert_eq!(x.layout(), y.layout()); + let layout = x.layout(); + + let (lane_type, lane_count) = lane_type_and_count(fx, layout, intrinsic); + let (ret_lane_type, ret_lane_count) = lane_type_and_count(fx, ret.layout(), intrinsic); + + assert_eq!(lane_type, ret_lane_type); + assert_eq!(n, ret_lane_count); + + let total_len = lane_count * 2; + + let indexes = { + use rustc::mir::interpret::*; + let idx_const = crate::constant::mir_operand_get_const_val(fx, idx).expect("simd_shuffle* idx not const"); + + let idx_bytes = match idx_const.val { + ty::ConstKind::Value(ConstValue::ByRef { alloc, offset }) => { + let ptr = Pointer::new(AllocId(0 /* dummy */), offset); + let size = Size::from_bytes(4 * u64::from(ret_lane_count) /* size_of([u32; ret_lane_count]) */); + alloc.get_bytes(fx, ptr, size).unwrap() + } + _ => unreachable!("{:?}", idx_const), + }; + + (0..ret_lane_count).map(|i| { + let i = usize::try_from(i).unwrap(); + let idx = rustc::mir::interpret::read_target_uint( + fx.tcx.data_layout.endian, + &idx_bytes[4*i.. 4*i + 4], + ).expect("read_target_uint"); + u32::try_from(idx).expect("try_from u32") + }).collect::>() + }; + + for &idx in &indexes { + assert!(idx < total_len, "idx {} out of range 0..{}", idx, total_len); + } + + for (out_idx, in_idx) in indexes.into_iter().enumerate() { + let in_lane = if in_idx < lane_count { + x.value_field(fx, mir::Field::new(in_idx.try_into().unwrap())) + } else { + y.value_field(fx, mir::Field::new((in_idx - lane_count).try_into().unwrap())) + }; + let out_lane = ret.place_field(fx, mir::Field::new(out_idx)); + out_lane.write_cvalue(fx, in_lane); + } + }; + + simd_extract, (c v, o idx) { + let idx_const = if let Some(idx_const) = crate::constant::mir_operand_get_const_val(fx, idx) { + idx_const + } else { + fx.tcx.sess.span_warn( + fx.mir.span, + "`#[rustc_arg_required_const(..)]` is not yet supported. Calling this function will panic.", + ); + crate::trap::trap_panic(fx, "`#[rustc_arg_required_const(..)]` is not yet supported."); + return; + }; + + let idx = idx_const.val.try_to_bits(Size::from_bytes(4 /* u32*/)).expect(&format!("kind not scalar: {:?}", idx_const)); + let (_lane_type, lane_count) = lane_type_and_count(fx, v.layout(), intrinsic); + if idx >= lane_count.into() { + fx.tcx.sess.span_fatal(fx.mir.span, &format!("[simd_extract] idx {} >= lane_count {}", idx, lane_count)); + } + + let ret_lane = v.value_field(fx, mir::Field::new(idx.try_into().unwrap())); + ret.write_cvalue(fx, ret_lane); + }; + + simd_add, (c x, c y) { + simd_int_flt_binop!(fx, intrinsic, iadd|fadd(x, y) -> ret); + }; + simd_sub, (c x, c y) { + simd_int_flt_binop!(fx, intrinsic, isub|fsub(x, y) -> ret); + }; + simd_mul, (c x, c y) { + simd_int_flt_binop!(fx, intrinsic, imul|fmul(x, y) -> ret); + }; + simd_div, (c x, c y) { + simd_int_flt_binop!(fx, intrinsic, udiv|sdiv|fdiv(x, y) -> ret); + }; + simd_shl, (c x, c y) { + simd_int_binop!(fx, intrinsic, ishl(x, y) -> ret); + }; + simd_shr, (c x, c y) { + simd_int_binop!(fx, intrinsic, ushr|sshr(x, y) -> ret); + }; + simd_and, (c x, c y) { + simd_int_binop!(fx, intrinsic, band(x, y) -> ret); + }; + simd_or, (c x, c y) { + simd_int_binop!(fx, intrinsic, bor(x, y) -> ret); + }; + simd_xor, (c x, c y) { + simd_int_binop!(fx, intrinsic, bxor(x, y) -> ret); + }; + + simd_fmin, (c x, c y) { + simd_flt_binop!(fx, intrinsic, fmin(x, y) -> ret); + }; + simd_fmax, (c x, c y) { + simd_flt_binop!(fx, intrinsic, fmax(x, y) -> ret); + }; + } +}