rust/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs

pub mod llvm;
mod simd;

#[cfg(feature = "master")]
use std::iter;

#[cfg(feature = "master")]
use gccjit::Type;
use gccjit::{ComparisonOp, Function, FunctionType, RValue, ToRValue, UnaryOp};
use rustc_abi::{BackendRepr, HasDataLayout, WrappingRange};
use rustc_codegen_ssa::MemFlags;
use rustc_codegen_ssa::base::wants_msvc_seh;
use rustc_codegen_ssa::common::IntPredicate;
use rustc_codegen_ssa::errors::InvalidMonomorphization;
use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
use rustc_codegen_ssa::mir::place::{PlaceRef, PlaceValue};
#[cfg(feature = "master")]
use rustc_codegen_ssa::traits::MiscCodegenMethods;
use rustc_codegen_ssa::traits::{
    ArgAbiBuilderMethods, BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods,
    IntrinsicCallBuilderMethods, LayoutTypeCodegenMethods,
};
use rustc_data_structures::fx::FxHashSet;
#[cfg(feature = "master")]
use rustc_middle::ty::layout::FnAbiOf;
use rustc_middle::ty::layout::LayoutOf;
use rustc_middle::ty::{self, Instance, Ty};
use rustc_middle::{bug, span_bug};
use rustc_span::{Span, Symbol, sym};
use rustc_target::callconv::{ArgAbi, PassMode};

#[cfg(feature = "master")]
use crate::abi::FnAbiGccExt;
use crate::abi::GccType;
use crate::builder::Builder;
use crate::common::{SignType, TypeReflection};
use crate::context::CodegenCx;
use crate::intrinsic::simd::generic_simd_intrinsic;
use crate::type_of::LayoutGccExt;

fn get_simple_intrinsic<'gcc, 'tcx>(
    cx: &CodegenCx<'gcc, 'tcx>,
    name: Symbol,
) -> Option<Function<'gcc>> {
    let gcc_name = match name {
        sym::sqrtf32 => "sqrtf",
        sym::sqrtf64 => "sqrt",
        sym::powif32 => "__builtin_powif",
        sym::powif64 => "__builtin_powi",
        sym::sinf32 => "sinf",
        sym::sinf64 => "sin",
        sym::cosf32 => "cosf",
        sym::cosf64 => "cos",
        sym::powf32 => "powf",
        sym::powf64 => "pow",
        sym::expf32 => "expf",
        sym::expf64 => "exp",
        sym::exp2f32 => "exp2f",
        sym::exp2f64 => "exp2",
        sym::logf32 => "logf",
        sym::logf64 => "log",
        sym::log10f32 => "log10f",
        sym::log10f64 => "log10",
        sym::log2f32 => "log2f",
        sym::log2f64 => "log2",
        sym::fmaf32 => "fmaf",
        sym::fmaf64 => "fma",
        // FIXME: calling `fma` from libc without FMA target feature uses expensive software emulation
        sym::fmuladdf32 => "fmaf", // FIXME: use gcc intrinsic analogous to llvm.fmuladd.f32
        sym::fmuladdf64 => "fma",  // FIXME: use gcc intrinsic analogous to llvm.fmuladd.f64
        sym::minimumf32 => "fminimumf",
        sym::minimumf64 => "fminimum",
        sym::minimumf128 => {
            // GCC doesn't have the intrinsic we want so we use the compiler-builtins one
            // https://docs.rs/compiler_builtins/latest/compiler_builtins/math/full_availability/fn.fminimumf128.html
            let f128_type = cx.type_f128();
            return Some(cx.context.new_function(
                None,
                FunctionType::Extern,
                f128_type,
                &[
                    cx.context.new_parameter(None, f128_type, "a"),
                    cx.context.new_parameter(None, f128_type, "b"),
                ],
                "fminimumf128",
                false,
            ));
        }
        sym::maximumf32 => "fmaximumf",
        sym::maximumf64 => "fmaximum",
        sym::maximumf128 => {
            // GCC doesn't have the intrinsic we want so we use the compiler-builtins one
            // https://docs.rs/compiler_builtins/latest/compiler_builtins/math/full_availability/fn.fmaximumf128.html
            let f128_type = cx.type_f128();
            return Some(cx.context.new_function(
                None,
                FunctionType::Extern,
                f128_type,
                &[
                    cx.context.new_parameter(None, f128_type, "a"),
                    cx.context.new_parameter(None, f128_type, "b"),
                ],
                "fmaximumf128",
                false,
            ));
        }
        sym::copysignf32 => "copysignf",
        sym::copysignf64 => "copysign",
        sym::floorf32 => "floorf",
        sym::floorf64 => "floor",
        sym::ceilf32 => "ceilf",
        sym::ceilf64 => "ceil",
        sym::truncf32 => "truncf",
        sym::truncf64 => "trunc",
        // We match the LLVM backend and lower this to `rint`.
        sym::round_ties_even_f32 => "rintf",
        sym::round_ties_even_f64 => "rint",
        sym::roundf32 => "roundf",
        sym::roundf64 => "round",
        sym::abort => "abort",
        _ => return None,
    };
    Some(cx.context.get_builtin_function(gcc_name))
}

// FIXME(antoyo): We can probably remove these and use the fallback intrinsic implementation.
fn get_simple_function<'gcc, 'tcx>(
    cx: &CodegenCx<'gcc, 'tcx>,
    name: Symbol,
) -> Option<Function<'gcc>> {
    let (return_type, parameters, func_name) = match name {
        sym::minimumf32 => {
            let parameters = [
                cx.context.new_parameter(None, cx.float_type, "a"),
                cx.context.new_parameter(None, cx.float_type, "b"),
            ];
            (cx.float_type, parameters, "fminimumf")
        }
        sym::minimumf64 => {
            let parameters = [
                cx.context.new_parameter(None, cx.double_type, "a"),
                cx.context.new_parameter(None, cx.double_type, "b"),
            ];
            (cx.double_type, parameters, "fminimum")
        }
        sym::minimumf128 => {
            let f128_type = cx.type_f128();
            // GCC doesn't have the intrinsic we want so we use the compiler-builtins one
            // https://docs.rs/compiler_builtins/latest/compiler_builtins/math/full_availability/fn.fminimumf128.html
            let parameters = [
                cx.context.new_parameter(None, f128_type, "a"),
                cx.context.new_parameter(None, f128_type, "b"),
            ];
            (f128_type, parameters, "fminimumf128")
        }
        sym::maximumf32 => {
            let parameters = [
                cx.context.new_parameter(None, cx.float_type, "a"),
                cx.context.new_parameter(None, cx.float_type, "b"),
            ];
            (cx.float_type, parameters, "fmaximumf")
        }
        sym::maximumf64 => {
            let parameters = [
                cx.context.new_parameter(None, cx.double_type, "a"),
                cx.context.new_parameter(None, cx.double_type, "b"),
            ];
            (cx.double_type, parameters, "fmaximum")
        }
        sym::maximumf128 => {
            let f128_type = cx.type_f128();
            // GCC doesn't have the intrinsic we want so we use the compiler-builtins one
            // https://docs.rs/compiler_builtins/latest/compiler_builtins/math/full_availability/fn.fmaximumf128.html
            let parameters = [
                cx.context.new_parameter(None, f128_type, "a"),
                cx.context.new_parameter(None, f128_type, "b"),
            ];
            (f128_type, parameters, "fmaximumf128")
        }
        _ => return None,
    };
    Some(cx.context.new_function(
        None,
        FunctionType::Extern,
        return_type,
        &parameters,
        func_name,
        false,
    ))
}

fn get_simple_function_f128<'gcc, 'tcx>(
    span: Span,
    cx: &CodegenCx<'gcc, 'tcx>,
    name: Symbol,
) -> Function<'gcc> {
    let f128_type = cx.type_f128();
    let func_name = match name {
        sym::ceilf128 => "ceilf128",
        sym::fabs => "fabsf128",
        sym::floorf128 => "floorf128",
        sym::truncf128 => "truncf128",
        sym::roundf128 => "roundf128",
        sym::round_ties_even_f128 => "roundevenf128",
        sym::sqrtf128 => "sqrtf128",
        _ => span_bug!(span, "used get_simple_function_f128 for non-unary f128 intrinsic"),
    };
    cx.context.new_function(
        None,
        FunctionType::Extern,
        f128_type,
        &[cx.context.new_parameter(None, f128_type, "a")],
        func_name,
        false,
    )
}

fn f16_builtin<'gcc, 'tcx>(
    cx: &CodegenCx<'gcc, 'tcx>,
    name: Symbol,
    args: &[OperandRef<'tcx, RValue<'gcc>>],
) -> RValue<'gcc> {
    let f32_type = cx.type_f32();
    let builtin_name = match name {
        sym::ceilf16 => "__builtin_ceilf",
        sym::copysignf16 => "__builtin_copysignf",
        sym::fabs => "fabsf",
        sym::floorf16 => "__builtin_floorf",
        sym::fmaf16 => "fmaf",
        sym::powf16 => "__builtin_powf",
        sym::powif16 => {
            let func = cx.context.get_builtin_function("__builtin_powif");
            let arg0 = cx.context.new_cast(None, args[0].immediate(), f32_type);
            let args = [arg0, args[1].immediate()];
            let result = cx.context.new_call(None, func, &args);
            return cx.context.new_cast(None, result, cx.type_f16());
        }
        sym::roundf16 => "__builtin_roundf",
        sym::round_ties_even_f16 => "__builtin_rintf",
        sym::sqrtf16 => "__builtin_sqrtf",
        sym::truncf16 => "__builtin_truncf",
        _ => unreachable!(),
    };

    let func = cx.context.get_builtin_function(builtin_name);
    let args: Vec<_> =
        args.iter().map(|arg| cx.context.new_cast(None, arg.immediate(), f32_type)).collect();
    let result = cx.context.new_call(None, func, &args);
    cx.context.new_cast(None, result, cx.type_f16())
}

impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
    fn codegen_intrinsic_call(
        &mut self,
        instance: Instance<'tcx>,
        args: &[OperandRef<'tcx, RValue<'gcc>>],
        result: PlaceRef<'tcx, RValue<'gcc>>,
        span: Span,
    ) -> Result<(), Instance<'tcx>> {
        let tcx = self.tcx;

        let name = tcx.item_name(instance.def_id());
        let name_str = name.as_str();
        let fn_args = instance.args;

        let simple = get_simple_intrinsic(self, name);
        let simple_func = get_simple_function(self, name);

        let value = match name {
            _ if simple.is_some() => {
                let func = simple.expect("simple intrinsic function");
                self.cx.context.new_call(
                    self.location,
                    func,
                    &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
                )
            }
            _ if simple_func.is_some() => {
                let func = simple_func.expect("simple function");
                self.cx.context.new_call(
                    self.location,
                    func,
                    &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
                )
            }
            sym::ceilf16
            | sym::copysignf16
            | sym::floorf16
            | sym::fmaf16
            | sym::powf16
            | sym::powif16
            | sym::roundf16
            | sym::round_ties_even_f16
            | sym::sqrtf16
            | sym::truncf16 => f16_builtin(self, name, args),
            sym::ceilf128
            | sym::floorf128
            | sym::truncf128
            | sym::roundf128
            | sym::round_ties_even_f128
            | sym::sqrtf128
                if self.cx.supports_f128_type =>
            {
                let func = get_simple_function_f128(span, self, name);
                self.cx.context.new_call(
                    self.location,
                    func,
                    &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
                )
            }
            sym::copysignf128 if self.cx.supports_f128_type => {
                let f128_type = self.cx.type_f128();
                let func = self.cx.context.new_function(
                    None,
                    FunctionType::Extern,
                    f128_type,
                    &[
                        self.cx.context.new_parameter(None, f128_type, "a"),
                        self.cx.context.new_parameter(None, f128_type, "b"),
                    ],
                    "copysignf128",
                    false,
                );
                self.cx.context.new_call(
                    self.location,
                    func,
                    &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
                )
            }
            sym::fmaf128 => {
                let f128_type = self.cx.type_f128();
                let func = self.cx.context.new_function(
                    None,
                    FunctionType::Extern,
                    f128_type,
                    &[
                        self.cx.context.new_parameter(None, f128_type, "a"),
                        self.cx.context.new_parameter(None, f128_type, "b"),
                        self.cx.context.new_parameter(None, f128_type, "c"),
                    ],
                    "fmaf128",
                    false,
                );
                self.cx.context.new_call(
                    self.location,
                    func,
                    &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
                )
            }
            sym::powif128 => {
                let f128_type = self.cx.type_f128();
                let func = self.cx.context.new_function(
                    None,
                    FunctionType::Extern,
                    f128_type,
                    &[
                        self.cx.context.new_parameter(None, f128_type, "a"),
                        self.cx.context.new_parameter(None, self.int_type, "b"),
                    ],
                    "__powitf2",
                    false,
                );
                self.cx.context.new_call(
                    self.location,
                    func,
                    &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
                )
            }
            sym::is_val_statically_known => {
                let a = args[0].immediate();
                let builtin = self.context.get_builtin_function("__builtin_constant_p");
                let res = self.context.new_call(None, builtin, &[a]);
                self.icmp(IntPredicate::IntEQ, res, self.const_i32(0))
            }
            sym::catch_unwind => {
                try_intrinsic(
                    self,
                    args[0].immediate(),
                    args[1].immediate(),
                    args[2].immediate(),
                    result,
                );
                return Ok(());
            }
            sym::breakpoint => {
                unimplemented!();
            }
            sym::va_arg => {
                unimplemented!();
            }

            sym::volatile_load | sym::unaligned_volatile_load => {
                let ptr = args[0].immediate();
                let load = self.volatile_load(result.layout.gcc_type(self), ptr);
                // FIXME(antoyo): set alignment.
                if let BackendRepr::Scalar(scalar) = result.layout.backend_repr {
                    self.to_immediate_scalar(load, scalar)
                } else {
                    load
                }
            }
            sym::volatile_store => {
                let dst = args[0].deref(self.cx());
                args[1].val.volatile_store(self, dst);
                return Ok(());
            }
            sym::unaligned_volatile_store => {
                let dst = args[0].deref(self.cx());
                args[1].val.unaligned_volatile_store(self, dst);
                return Ok(());
            }
            sym::prefetch_read_data
            | sym::prefetch_write_data
            | sym::prefetch_read_instruction
            | sym::prefetch_write_instruction => {
                unimplemented!();
            }
            sym::ctlz
            | sym::ctlz_nonzero
            | sym::cttz
            | sym::cttz_nonzero
            | sym::ctpop
            | sym::bswap
            | sym::bitreverse
            | sym::rotate_left
            | sym::rotate_right
            | sym::saturating_add
            | sym::saturating_sub => {
                match int_type_width_signed(args[0].layout.ty, self) {
                    Some((width, signed)) => match name {
                        sym::ctlz => self.count_leading_zeroes(width, args[0].immediate()),

                        sym::ctlz_nonzero => {
                            self.count_leading_zeroes_nonzero(width, args[0].immediate())
                        }
                        sym::cttz => self.count_trailing_zeroes(width, args[0].immediate()),
                        sym::cttz_nonzero => {
                            self.count_trailing_zeroes_nonzero(width, args[0].immediate())
                        }
                        sym::ctpop => self.pop_count(args[0].immediate()),
                        sym::bswap => {
                            if width == 8 {
                                args[0].immediate() // byte swap a u8/i8 is just a no-op
                            } else {
                                self.gcc_bswap(args[0].immediate(), width)
                            }
                        }
                        sym::bitreverse => self.bit_reverse(width, args[0].immediate()),
                        sym::rotate_left | sym::rotate_right => {
                            // Using optimized branchless algorithm from:
                            // https://blog.regehr.org/archives/1063
                            // This implementation uses the pattern (x<<n) | (x>>(-n&(width-1)))
                            // which generates efficient code for other platforms.
                            let is_left = name == sym::rotate_left;
                            let val = args[0].immediate();
                            let raw_shift = args[1].immediate();
                            if is_left {
                                self.rotate_left(val, raw_shift, width)
                            } else {
                                self.rotate_right(val, raw_shift, width)
                            }
                        }
                        sym::saturating_add => self.saturating_add(
                            args[0].immediate(),
                            args[1].immediate(),
                            signed,
                            width,
                        ),
                        sym::saturating_sub => self.saturating_sub(
                            args[0].immediate(),
                            args[1].immediate(),
                            signed,
                            width,
                        ),
                        _ => bug!(),
                    },
                    None => {
                        tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType {
                            span,
                            name,
                            ty: args[0].layout.ty,
                        });
                        return Ok(());
                    }
                }
            }
            sym::fabs => 'fabs: {
                let ty = args[0].layout.ty;
                let ty::Float(float_ty) = *ty.kind() else {
                    span_bug!(span, "expected float type for fabs intrinsic: {:?}", ty);
                };
                let func = match float_ty {
                    ty::FloatTy::F16 => break 'fabs f16_builtin(self, name, args),
                    ty::FloatTy::F32 => self.context.get_builtin_function("fabsf"),
                    ty::FloatTy::F64 => self.context.get_builtin_function("fabs"),
                    ty::FloatTy::F128 => get_simple_function_f128(span, self, name),
                };
                self.cx.context.new_call(
                    self.location,
                    func,
                    &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
                )
            }

            sym::raw_eq => {
                use rustc_abi::BackendRepr::*;
                let tp_ty = fn_args.type_at(0);
                let layout = self.layout_of(tp_ty).layout;
                let _use_integer_compare = match layout.backend_repr() {
                    Scalar(_) | ScalarPair(_, _) => true,
                    SimdVector { .. } | SimdScalableVector { .. } => false,
                    Memory { .. } => {
                        // For rusty ABIs, small aggregates are actually passed
                        // as `RegKind::Integer` (see `FnAbi::adjust_for_abi`),
                        // so we re-use that same threshold here.
                        layout.size() <= self.data_layout().pointer_size() * 2
                    }
                };

                let a = args[0].immediate();
                let b = args[1].immediate();
                if layout.size().bytes() == 0 {
                    self.const_bool(true)
                }
                /*else if use_integer_compare {
                    let integer_ty = self.type_ix(layout.size.bits()); // FIXME(antoyo): LLVM creates an integer of 96 bits for [i32; 3], but gcc doesn't support this, so it creates an integer of 128 bits.
                    let ptr_ty = self.type_ptr_to(integer_ty);
                    let a_ptr = self.bitcast(a, ptr_ty);
                    let a_val = self.load(integer_ty, a_ptr, layout.align.abi);
                    let b_ptr = self.bitcast(b, ptr_ty);
                    let b_val = self.load(integer_ty, b_ptr, layout.align.abi);
                    self.icmp(IntPredicate::IntEQ, a_val, b_val)
                }*/
                else {
                    let void_ptr_type = self.context.new_type::<*const ()>();
                    let a_ptr = self.bitcast(a, void_ptr_type);
                    let b_ptr = self.bitcast(b, void_ptr_type);
                    let n = self.context.new_cast(
                        None,
                        self.const_usize(layout.size().bytes()),
                        self.sizet_type,
                    );
                    let builtin = self.context.get_builtin_function("memcmp");
                    let cmp = self.context.new_call(None, builtin, &[a_ptr, b_ptr, n]);
                    self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0))
                }
            }

            sym::compare_bytes => {
                let a = args[0].immediate();
                let b = args[1].immediate();
                let n = args[2].immediate();

                let void_ptr_type = self.context.new_type::<*const ()>();
                let a_ptr = self.bitcast(a, void_ptr_type);
                let b_ptr = self.bitcast(b, void_ptr_type);

                // Here we assume that the `memcmp` provided by the target is a NOP for size 0.
                let builtin = self.context.get_builtin_function("memcmp");
                let cmp = self.context.new_call(None, builtin, &[a_ptr, b_ptr, n]);
                self.sext(cmp, self.type_ix(32))
            }

            sym::black_box => {
                args[0].val.store(self, result);

                let block = self.llbb();
                let extended_asm = block.add_extended_asm(None, "");
                extended_asm.add_input_operand(None, "r", result.val.llval);
                extended_asm.add_clobber("memory");
                extended_asm.set_volatile_flag(true);

                // We have copied the value to `result` already.
                return Ok(());
            }

            sym::ptr_mask => {
                let usize_type = self.context.new_type::<usize>();
                let void_ptr_type = self.context.new_type::<*const ()>();

                let ptr = args[0].immediate();
                let mask = args[1].immediate();

                let addr = self.bitcast(ptr, usize_type);
                let masked = self.and(addr, mask);
                self.bitcast(masked, void_ptr_type)
            }

            _ if name_str.starts_with("simd_") => {
                match generic_simd_intrinsic(
                    self,
                    name,
                    args,
                    result.layout.ty,
                    result.layout.gcc_type(self),
                    span,
                ) {
                    Ok(value) => value,
                    Err(()) => return Ok(()),
                }
            }

            // Fall back to default body
            _ => return Err(Instance::new_raw(instance.def_id(), instance.args)),
        };

        if result.layout.ty.is_bool() {
            let val = self.from_immediate(value);
            self.store_to_place(val, result.val);
        } else if !result.layout.ty.is_unit() {
            self.store_to_place(value, result.val);
        }
        Ok(())
    }

    fn codegen_llvm_intrinsic_call(
        &mut self,
        instance: ty::Instance<'tcx>,
        args: &[OperandRef<'tcx, Self::Value>],
        is_cleanup: bool,
    ) -> Self::Value {
        let func = if let Some(&func) = self.intrinsic_instances.borrow().get(&instance) {
            func
        } else {
            let sym = self.tcx.symbol_name(instance).name;

            let func = if let Some(func) = self.intrinsics.borrow().get(sym) {
                *func
            } else {
                self.linkage.set(FunctionType::Extern);

                let func = match sym {
                    "llvm.fma.f16" => {
                        // fma is not a target builtin, but a normal builtin, so we handle it differently
                        // here.
                        self.context.get_builtin_function("fma")
                    }
                    _ => llvm::intrinsic(sym, self),
                };

                self.intrinsics.borrow_mut().insert(sym.to_string(), func);

                self.on_stack_function_params.borrow_mut().insert(func, FxHashSet::default());

                crate::attributes::from_fn_attrs(self, func, instance);

                func
            };

            self.intrinsic_instances.borrow_mut().insert(instance, func);

            func
        };
        let fn_ptr = func.get_address(None);
        let fn_ty = fn_ptr.get_type();

        let mut call_args = vec![];

        for arg in args {
            match arg.val {
                OperandValue::ZeroSized => {}
                OperandValue::Immediate(_) => call_args.push(arg.immediate()),
                OperandValue::Pair(a, b) => {
                    call_args.push(a);
                    call_args.push(b);
                }
                OperandValue::Ref(op_place_val) => {
                    let mut llval = op_place_val.llval;
                    // We can't use `PlaceRef::load` here because the argument
                    // may have a type we don't treat as immediate, but the ABI
                    // used for this call is passing it by-value. In that case,
                    // the load would just produce `OperandValue::Ref` instead
                    // of the `OperandValue::Immediate` we need for the call.
                    llval = self.load(self.backend_type(arg.layout), llval, op_place_val.align);
                    if let BackendRepr::Scalar(scalar) = arg.layout.backend_repr {
                        if scalar.is_bool() {
                            self.range_metadata(llval, WrappingRange { start: 0, end: 1 });
                        }
                        // We store bools as `i8` so we need to truncate to `i1`.
                        llval = self.to_immediate_scalar(llval, scalar);
                    }
                    call_args.push(llval);
                }
            }
        }

        // FIXME directly use the llvm intrinsic adjustment functions here
        let llret = self.call(fn_ty, None, None, fn_ptr, &call_args, None, None);
        if is_cleanup {
            self.apply_attrs_to_cleanup_callsite(llret);
        }

        llret
    }

    fn abort(&mut self) {
        let func = self.context.get_builtin_function("abort");
        let func: RValue<'gcc> = unsafe { std::mem::transmute(func) };
        self.call(self.type_void(), None, None, func, &[], None, None);
    }

    fn assume(&mut self, value: Self::Value) {
        // FIXME(antoyo): switch to assume when it exists.
        // Or use something like this:
        // #define __assume(cond) do { if (!(cond)) __builtin_unreachable(); } while (0)
        self.expect(value, true);
    }

    fn expect(&mut self, cond: Self::Value, _expected: bool) -> Self::Value {
        // FIXME(antoyo)
        cond
    }

    fn type_checked_load(
        &mut self,
        _vtable: Self::Value,
        _vtable_byte_offset: u64,
        _typeid: &[u8],
    ) -> Self::Value {
        // Unsupported.
        self.context.new_rvalue_from_int(self.int_type, 0)
    }

    fn va_start(&mut self, _va_list: RValue<'gcc>) -> RValue<'gcc> {
        unimplemented!();
    }

    fn va_end(&mut self, _va_list: RValue<'gcc>) -> RValue<'gcc> {
        // FIXME(antoyo): implement.
        self.context.new_rvalue_from_int(self.int_type, 0)
    }
}

impl<'a, 'gcc, 'tcx> ArgAbiBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
    fn store_fn_arg(
        &mut self,
        arg_abi: &ArgAbi<'tcx, Ty<'tcx>>,
        idx: &mut usize,
        dst: PlaceRef<'tcx, Self::Value>,
    ) {
        arg_abi.store_fn_arg(self, idx, dst)
    }

    fn store_arg(
        &mut self,
        arg_abi: &ArgAbi<'tcx, Ty<'tcx>>,
        val: RValue<'gcc>,
        dst: PlaceRef<'tcx, RValue<'gcc>>,
    ) {
        arg_abi.store(self, val, dst)
    }
}

pub trait ArgAbiExt<'gcc, 'tcx> {
    fn store(
        &self,
        bx: &mut Builder<'_, 'gcc, 'tcx>,
        val: RValue<'gcc>,
        dst: PlaceRef<'tcx, RValue<'gcc>>,
    );
    fn store_fn_arg(
        &self,
        bx: &mut Builder<'_, 'gcc, 'tcx>,
        idx: &mut usize,
        dst: PlaceRef<'tcx, RValue<'gcc>>,
    );
}

impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
    /// Stores a direct/indirect value described by this ArgAbi into a
    /// place for the original Rust type of this argument/return.
    /// Can be used for both storing formal arguments into Rust variables
    /// or results of call/invoke instructions into their destinations.
    fn store(
        &self,
        bx: &mut Builder<'_, 'gcc, 'tcx>,
        val: RValue<'gcc>,
        dst: PlaceRef<'tcx, RValue<'gcc>>,
    ) {
        if self.is_ignore() {
            return;
        }
        if self.is_sized_indirect() {
            OperandValue::Ref(PlaceValue::new_sized(val, self.layout.align.abi)).store(bx, dst)
        } else if self.is_unsized_indirect() {
            bug!("unsized `ArgAbi` cannot be stored");
        } else if let PassMode::Cast { ref cast, .. } = self.mode {
            // FIXME(eddyb): Figure out when the simpler Store is safe, clang
            // uses it for i16 -> {i8, i8}, but not for i24 -> {i8, i8, i8}.
            let can_store_through_cast_ptr = false;
            if can_store_through_cast_ptr {
                let cast_ptr_llty = bx.type_ptr_to(cast.gcc_type(bx));
                let cast_dst = bx.pointercast(dst.val.llval, cast_ptr_llty);
                bx.store(val, cast_dst, self.layout.align.abi);
            } else {
                // The actual return type is a struct, but the ABI
                // adaptation code has cast it into some scalar type.  The
                // code that follows is the only reliable way I have
                // found to do a transform like i64 -> {i32,i32}.
                // Basically we dump the data onto the stack then memcpy it.
                //
                // Other approaches I tried:
                // - Casting rust ret pointer to the foreign type and using Store
                //   is (a) unsafe if size of foreign type > size of rust type and
                //   (b) runs afoul of strict aliasing rules, yielding invalid
                //   assembly under -O (specifically, the store gets removed).
                // - Truncating foreign type to correct integral type and then
                //   bitcasting to the struct type yields invalid cast errors.

                // We instead thus allocate some scratch space...
                let scratch_size = cast.size(bx);
                let scratch_align = cast.align(bx);
                let scratch = bx.alloca(scratch_size, scratch_align);
                bx.lifetime_start(scratch, scratch_size);

                // ... where we first store the value...
                rustc_codegen_ssa::mir::store_cast(bx, cast, val, scratch, scratch_align);

                // ... and then memcpy it to the intended destination.
                bx.memcpy(
                    dst.val.llval,
                    self.layout.align.abi,
                    scratch,
                    scratch_align,
                    bx.const_usize(self.layout.size.bytes()),
                    MemFlags::empty(),
                    None,
                );

                bx.lifetime_end(scratch, scratch_size);
            }
        } else {
            OperandValue::Immediate(val).store(bx, dst);
        }
    }

    fn store_fn_arg<'a>(
        &self,
        bx: &mut Builder<'a, 'gcc, 'tcx>,
        idx: &mut usize,
        dst: PlaceRef<'tcx, RValue<'gcc>>,
    ) {
        let mut next = || {
            let val = bx.current_func().get_param(*idx as i32);
            *idx += 1;
            val.to_rvalue()
        };
        match self.mode {
            PassMode::Ignore => {}
            PassMode::Pair(..) => {
                OperandValue::Pair(next(), next()).store(bx, dst);
            }
            PassMode::Indirect { meta_attrs: Some(_), .. } => {
                bug!("unsized `ArgAbi` cannot be stored");
            }
            PassMode::Direct(_)
            | PassMode::Indirect { meta_attrs: None, .. }
            | PassMode::Cast { .. } => {
                let next_arg = next();
                self.store(bx, next_arg, dst);
            }
        }
    }
}

fn int_type_width_signed<'gcc, 'tcx>(
    ty: Ty<'tcx>,
    cx: &CodegenCx<'gcc, 'tcx>,
) -> Option<(u64, bool)> {
    match *ty.kind() {
        ty::Int(t) => Some((
            match t {
                rustc_middle::ty::IntTy::Isize => u64::from(cx.tcx.sess.target.pointer_width),
                rustc_middle::ty::IntTy::I8 => 8,
                rustc_middle::ty::IntTy::I16 => 16,
                rustc_middle::ty::IntTy::I32 => 32,
                rustc_middle::ty::IntTy::I64 => 64,
                rustc_middle::ty::IntTy::I128 => 128,
            },
            true,
        )),
        ty::Uint(t) => Some((
            match t {
                rustc_middle::ty::UintTy::Usize => u64::from(cx.tcx.sess.target.pointer_width),
                rustc_middle::ty::UintTy::U8 => 8,
                rustc_middle::ty::UintTy::U16 => 16,
                rustc_middle::ty::UintTy::U32 => 32,
                rustc_middle::ty::UintTy::U64 => 64,
                rustc_middle::ty::UintTy::U128 => 128,
            },
            false,
        )),
        _ => None,
    }
}

impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
    fn bit_reverse(&mut self, width: u64, value: RValue<'gcc>) -> RValue<'gcc> {
        let result_type = value.get_type();
        let typ = result_type.to_unsigned(self.cx);

        let value =
            if result_type.is_signed(self.cx) { self.gcc_int_cast(value, typ) } else { value };

        let context = &self.cx.context;
        let result = match width {
            8 | 16 | 32 | 64 => {
                let mask = ((1u128 << width) - 1) as u64;
                let (m0, m1, m2) = if width > 16 {
                    (
                        context.new_rvalue_from_long(typ, (0x5555555555555555u64 & mask) as i64),
                        context.new_rvalue_from_long(typ, (0x3333333333333333u64 & mask) as i64),
                        context.new_rvalue_from_long(typ, (0x0f0f0f0f0f0f0f0fu64 & mask) as i64),
                    )
                } else {
                    (
                        context.new_rvalue_from_int(typ, (0x5555u64 & mask) as i32),
                        context.new_rvalue_from_int(typ, (0x3333u64 & mask) as i32),
                        context.new_rvalue_from_int(typ, (0x0f0fu64 & mask) as i32),
                    )
                };
                let one = context.new_rvalue_from_int(typ, 1);
                let two = context.new_rvalue_from_int(typ, 2);
                let four = context.new_rvalue_from_int(typ, 4);

                // First step.
                let left = self.lshr(value, one);
                let left = self.and(left, m0);
                let right = self.and(value, m0);
                let right = self.shl(right, one);
                let step1 = self.or(left, right);

                // Second step.
                let left = self.lshr(step1, two);
                let left = self.and(left, m1);
                let right = self.and(step1, m1);
                let right = self.shl(right, two);
                let step2 = self.or(left, right);

                // Third step.
                let left = self.lshr(step2, four);
                let left = self.and(left, m2);
                let right = self.and(step2, m2);
                let right = self.shl(right, four);
                let step3 = self.or(left, right);

                // Fourth step.
                if width == 8 { step3 } else { self.gcc_bswap(step3, width) }
            }
            128 => {
                // FIXME(antoyo): find a more efficient implementation?
                let sixty_four = self.gcc_int(typ, 64);
                let right_shift = self.gcc_lshr(value, sixty_four);
                let high = self.gcc_int_cast(right_shift, self.u64_type);
                let low = self.gcc_int_cast(value, self.u64_type);

                let reversed_high = self.bit_reverse(64, high);
                let reversed_low = self.bit_reverse(64, low);

                let new_low = self.gcc_int_cast(reversed_high, typ);
                let new_high = self.shl(self.gcc_int_cast(reversed_low, typ), sixty_four);

                self.gcc_or(new_low, new_high, self.location)
            }
            _ => {
                panic!("cannot bit reverse with width = {}", width);
            }
        };

        self.gcc_int_cast(result, result_type)
    }

    fn count_zeroes(&mut self, width: u64, arg: RValue<'gcc>, count_leading: bool) -> RValue<'gcc> {
        // if arg is 0, early return 0, else call count_leading_zeroes_nonzero or count_trailing_zeroes_nonzero
        let func = self.current_func();
        let then_block = func.new_block("then");
        let else_block = func.new_block("else");
        let after_block = func.new_block("after");

        let result = func.new_local(None, self.u32_type, "zeros");
        let zero = self.cx.gcc_zero(arg.get_type());
        let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero);
        self.llbb().end_with_conditional(None, cond, then_block, else_block);

        let zero_result = self.cx.gcc_uint(self.u32_type, width);
        then_block.add_assignment(None, result, zero_result);
        then_block.end_with_jump(None, after_block);

        // NOTE: since jumps were added in a place count_xxxxing_zeroes_nonzero() does not expect,
        // the current block in the state need to be updated.
        self.switch_to_block(else_block);

        let zeros = if count_leading {
            self.count_leading_zeroes_nonzero(width, arg)
        } else {
            self.count_trailing_zeroes_nonzero(width, arg)
        };
        self.llbb().add_assignment(None, result, zeros);
        self.llbb().end_with_jump(None, after_block);

        // NOTE: since jumps were added in a place rustc does not
        // expect, the current block in the state need to be updated.
        self.switch_to_block(after_block);

        result.to_rvalue()
    }

    fn count_zeroes_nonzero(
        &mut self,
        width: u64,
        arg: RValue<'gcc>,
        count_leading: bool,
    ) -> RValue<'gcc> {
        // Pre-condition: arg is guaranteed to not be 0 by caller
        fn use_builtin_function<'a, 'gcc, 'tcx>(
            builder: &mut Builder<'a, 'gcc, 'tcx>,
            builtin: &str,
            arg: RValue<'gcc>,
            arg_type: gccjit::Type<'gcc>,
            expected_type: gccjit::Type<'gcc>,
        ) -> RValue<'gcc> {
            let arg = if arg_type != expected_type {
                builder.context.new_cast(builder.location, arg, expected_type)
            } else {
                arg
            };
            let builtin = builder.context.get_builtin_function(builtin);
            let res = builder.context.new_call(builder.location, builtin, &[arg]);
            builder.context.new_cast(builder.location, res, builder.u32_type)
        }

        // FIXME(antoyo): use width?
        let result_type = self.u32_type;
        let mut arg_type = arg.get_type();
        let arg = if arg_type.is_signed(self.cx) {
            arg_type = arg_type.to_unsigned(self.cx);
            self.gcc_int_cast(arg, arg_type)
        } else {
            arg
        };
        // FIXME(antoyo): write a new function Type::is_compatible_with(&Type) and use it here
        // instead of using is_uint().
        if arg_type.is_uchar(self.cx) || arg_type.is_ushort(self.cx) || arg_type.is_uint(self.cx) {
            let builtin = if count_leading { "__builtin_clz" } else { "__builtin_ctz" };
            use_builtin_function(self, builtin, arg, arg_type, self.cx.uint_type)
        } else if arg_type.is_ulong(self.cx) {
            let builtin = if count_leading { "__builtin_clzl" } else { "__builtin_ctzl" };
            use_builtin_function(self, builtin, arg, arg_type, self.cx.uint_type)
        } else if arg_type.is_ulonglong(self.cx) {
            let builtin = if count_leading { "__builtin_clzll" } else { "__builtin_ctzll" };
            use_builtin_function(self, builtin, arg, arg_type, self.cx.uint_type)
        } else if width == 128 {
            // arg is guaranteed to not be 0, so either its 64 high or 64 low bits are not 0
            // __buildin_clzll is UB when called with 0, so call it on the 64 high bits if they are not 0,
            // else call it on the 64 low bits and add 64. In the else case, 64 low bits can't be 0
            // because arg is not 0.
            // __buildin_ctzll is UB when called with 0, so call it on the 64 low bits if they are not 0,
            // else call it on the 64 high bits and add 64. In the else case, 64 high bits can't be 0
            // because arg is not 0.

            let result = self.current_func().new_local(None, result_type, "count_zeroes_results");

            let cz_then_block = self.current_func().new_block("cz_then");
            let cz_else_block = self.current_func().new_block("cz_else");
            let cz_after_block = self.current_func().new_block("cz_after");

            let low = self.gcc_int_cast(arg, self.u64_type);
            let sixty_four = self.const_uint(arg_type, 64);
            let shift = self.lshr(arg, sixty_four);
            let high = self.gcc_int_cast(shift, self.u64_type);

            let (first, second, builtin) = if count_leading {
                (low, high, self.context.get_builtin_function("__builtin_clzll"))
            } else {
                (high, low, self.context.get_builtin_function("__builtin_ctzll"))
            };

            let zero_64 = self.const_uint(self.u64_type, 0);
            let cond = self.gcc_icmp(IntPredicate::IntNE, second, zero_64);
            self.llbb().end_with_conditional(self.location, cond, cz_then_block, cz_else_block);
            self.switch_to_block(cz_then_block);

            let result_128 =
                self.gcc_int_cast(self.context.new_call(None, builtin, &[second]), result_type);

            cz_then_block.add_assignment(self.location, result, result_128);
            cz_then_block.end_with_jump(self.location, cz_after_block);

            self.switch_to_block(cz_else_block);
            let count_more_zeroes =
                self.gcc_int_cast(self.context.new_call(None, builtin, &[first]), result_type);
            let sixty_four_result_type = self.const_uint(result_type, 64);
            let count_result_type = self.add(count_more_zeroes, sixty_four_result_type);
            cz_else_block.add_assignment(self.location, result, count_result_type);
            cz_else_block.end_with_jump(self.location, cz_after_block);
            self.switch_to_block(cz_after_block);
            result.to_rvalue()
        } else {
            let byte_diff = self.ulonglong_type.get_size() as i64 - arg_type.get_size() as i64;
            let diff = self.context.new_rvalue_from_long(self.int_type, byte_diff * 8);
            let ull_arg = self.context.new_cast(self.location, arg, self.ulonglong_type);

            let res = if count_leading {
                let count_leading_zeroes = self.context.get_builtin_function("__builtin_clzll");
                self.context.new_call(self.location, count_leading_zeroes, &[ull_arg]) - diff
            } else {
                let count_trailing_zeroes = self.context.get_builtin_function("__builtin_ctzll");
                let mask = self.context.new_rvalue_from_long(arg_type, -1); // To get the value with all bits set.
                let masked = mask
                    & self.context.new_unary_op(
                        self.location,
                        UnaryOp::BitwiseNegate,
                        arg_type,
                        arg,
                    );
                let cond =
                    self.context.new_comparison(self.location, ComparisonOp::Equals, masked, mask);
                let diff = diff * self.context.new_cast(self.location, cond, self.int_type);

                self.context.new_call(self.location, count_trailing_zeroes, &[ull_arg]) - diff
            };
            self.context.new_cast(self.location, res, result_type)
        }
    }

    fn count_leading_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
        self.count_zeroes(width, arg, true)
    }

    fn count_leading_zeroes_nonzero(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
        // Pre-condition: arg is guaranteed to not be 0 by caller, else count_leading_zeros should be used
        self.count_zeroes_nonzero(width, arg, true)
    }

    fn count_trailing_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
        self.count_zeroes(width, arg, false)
    }

    fn count_trailing_zeroes_nonzero(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
        // Pre-condition: arg is guaranteed to not be 0 by caller, else count_trailing_zeros should be used
        self.count_zeroes_nonzero(width, arg, false)
    }

    fn pop_count(&mut self, value: RValue<'gcc>) -> RValue<'gcc> {
        // FIXME(antoyo): use the optimized version with fewer operations.
        let result_type = self.u32_type;
        let arg_type = value.get_type();
        let value_type = arg_type.to_unsigned(self.cx);

        let value =
            if arg_type.is_signed(self.cx) { self.gcc_int_cast(value, value_type) } else { value };

        // only break apart 128-bit ints if they're not natively supported
        // FIXME(antoyo): remove this if/when native 128-bit integers land in libgccjit
        if value_type.is_u128(self.cx) && !self.cx.supports_128bit_integers {
            let sixty_four = self.gcc_int(value_type, 64);
            let right_shift = self.gcc_lshr(value, sixty_four);
            let high = self.gcc_int_cast(right_shift, self.cx.ulonglong_type);
            let high = self.pop_count(high);
            let low = self.gcc_int_cast(value, self.cx.ulonglong_type);
            let low = self.pop_count(low);
            let res = high + low;
            return self.gcc_int_cast(res, result_type);
        }

        // Use Wenger's algorithm for population count, gcc's seems to play better with it
        // for (int counter = 0; value != 0; counter++) {
        //     value &= value - 1;
        // }
        let func = self.current_func();
        let loop_head = func.new_block("head");
        let loop_body = func.new_block("body");
        let loop_tail = func.new_block("tail");

        let counter_type = self.int_type;
        let counter = self.current_func().new_local(None, counter_type, "popcount_counter");
        let val = self.current_func().new_local(None, value_type, "popcount_value");
        let zero = self.gcc_zero(counter_type);
        self.llbb().add_assignment(self.location, counter, zero);
        self.llbb().add_assignment(self.location, val, value);
        self.br(loop_head);

        // check if value isn't zero
        self.switch_to_block(loop_head);
        let zero = self.gcc_zero(value_type);
        let cond = self.gcc_icmp(IntPredicate::IntNE, val.to_rvalue(), zero);
        self.cond_br(cond, loop_body, loop_tail);

        // val &= val - 1;
        self.switch_to_block(loop_body);
        let one = self.gcc_int(value_type, 1);
        let sub = self.gcc_sub(val.to_rvalue(), one);
        let op = self.gcc_and(val.to_rvalue(), sub);
        loop_body.add_assignment(self.location, val, op);

        // counter += 1
        let one = self.gcc_int(counter_type, 1);
        let op = self.gcc_add(counter.to_rvalue(), one);
        loop_body.add_assignment(self.location, counter, op);
        self.br(loop_head);

        // end of loop
        self.switch_to_block(loop_tail);
        self.gcc_int_cast(counter.to_rvalue(), result_type)
    }

    // Algorithm from: https://blog.regehr.org/archives/1063
    fn rotate_left(
        &mut self,
        value: RValue<'gcc>,
        shift: RValue<'gcc>,
        width: u64,
    ) -> RValue<'gcc> {
        let max = self.const_uint(shift.get_type(), width);
        let shift = self.urem(shift, max);
        let lhs = self.shl(value, shift);
        let result_neg = self.neg(shift);
        let result_and = self.and(result_neg, self.const_uint(shift.get_type(), width - 1));
        let rhs = self.lshr(value, result_and);
        self.or(lhs, rhs)
    }

    // Algorithm from: https://blog.regehr.org/archives/1063
    fn rotate_right(
        &mut self,
        value: RValue<'gcc>,
        shift: RValue<'gcc>,
        width: u64,
    ) -> RValue<'gcc> {
        let max = self.const_uint(shift.get_type(), width);
        let shift = self.urem(shift, max);
        let lhs = self.lshr(value, shift);
        let result_neg = self.neg(shift);
        let result_and = self.and(result_neg, self.const_uint(shift.get_type(), width - 1));
        let rhs = self.shl(value, result_and);
        self.or(lhs, rhs)
    }

    fn saturating_add(
        &mut self,
        lhs: RValue<'gcc>,
        rhs: RValue<'gcc>,
        signed: bool,
        width: u64,
    ) -> RValue<'gcc> {
        let result_type = lhs.get_type();
        if signed {
            // Based on algorithm from: https://stackoverflow.com/a/56531252/389119
            let func = self.current_func();
            let res = func.new_local(self.location, result_type, "saturating_sum");
            let supports_native_type = self.is_native_int_type(result_type);
            let overflow = if supports_native_type {
                let func_name = "__builtin_add_overflow";
                let overflow_func = self.context.get_builtin_function(func_name);
                self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(self.location)], None)
            } else {
                let func_name = match width {
                    128 => "__rust_i128_addo",
                    _ => unreachable!(),
                };
                let (int_result, overflow) =
                    self.operation_with_overflow(func_name, lhs, rhs, width);
                self.llbb().add_assignment(self.location, res, int_result);
                overflow
            };

            let then_block = func.new_block("then");
            let after_block = func.new_block("after");

            // Return `result_type`'s maximum or minimum value on overflow
            // NOTE: convert the type to unsigned to have an unsigned shift.
            let unsigned_type = result_type.to_unsigned(self.cx);
            let shifted = self.gcc_lshr(
                self.gcc_int_cast(lhs, unsigned_type),
                self.gcc_int(unsigned_type, width as i64 - 1),
            );
            let uint_max = self.gcc_not(self.gcc_int(unsigned_type, 0));
            let int_max = self.gcc_lshr(uint_max, self.gcc_int(unsigned_type, 1));
            then_block.add_assignment(
                self.location,
                res,
                self.gcc_int_cast(self.gcc_add(shifted, int_max), result_type),
            );
            then_block.end_with_jump(self.location, after_block);

            self.llbb().end_with_conditional(self.location, overflow, then_block, after_block);

            // NOTE: since jumps were added in a place rustc does not
            // expect, the current block in the state need to be updated.
            self.switch_to_block(after_block);

            res.to_rvalue()
        } else {
            // Algorithm from: http://locklessinc.com/articles/sat_arithmetic/
            let res = self.gcc_add(lhs, rhs);
            let cond = self.gcc_icmp(IntPredicate::IntULT, res, lhs);
            let value = self.gcc_neg(self.gcc_int_cast(cond, result_type));
            self.gcc_or(res, value, self.location)
        }
    }

    // Algorithm from: https://locklessinc.com/articles/sat_arithmetic/
    fn saturating_sub(
        &mut self,
        lhs: RValue<'gcc>,
        rhs: RValue<'gcc>,
        signed: bool,
        width: u64,
    ) -> RValue<'gcc> {
        let result_type = lhs.get_type();
        if signed {
            // Based on algorithm from: https://stackoverflow.com/a/56531252/389119
            let func = self.current_func();
            let res = func.new_local(self.location, result_type, "saturating_diff");
            let supports_native_type = self.is_native_int_type(result_type);
            let overflow = if supports_native_type {
                let func_name = "__builtin_sub_overflow";
                let overflow_func = self.context.get_builtin_function(func_name);
                self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(self.location)], None)
            } else {
                let func_name = match width {
                    128 => "__rust_i128_subo",
                    _ => unreachable!(),
                };
                let (int_result, overflow) =
                    self.operation_with_overflow(func_name, lhs, rhs, width);
                self.llbb().add_assignment(self.location, res, int_result);
                overflow
            };

            let then_block = func.new_block("then");
            let after_block = func.new_block("after");

            // Return `result_type`'s maximum or minimum value on overflow
            // NOTE: convert the type to unsigned to have an unsigned shift.
            let unsigned_type = result_type.to_unsigned(self.cx);
            let shifted = self.gcc_lshr(
                self.gcc_int_cast(lhs, unsigned_type),
                self.gcc_int(unsigned_type, width as i64 - 1),
            );
            let uint_max = self.gcc_not(self.gcc_int(unsigned_type, 0));
            let int_max = self.gcc_lshr(uint_max, self.gcc_int(unsigned_type, 1));
            then_block.add_assignment(
                self.location,
                res,
                self.gcc_int_cast(self.gcc_add(shifted, int_max), result_type),
            );
            then_block.end_with_jump(self.location, after_block);

            self.llbb().end_with_conditional(self.location, overflow, then_block, after_block);

            // NOTE: since jumps were added in a place rustc does not
            // expect, the current block in the state need to be updated.
            self.switch_to_block(after_block);

            res.to_rvalue()
        } else {
            let res = self.gcc_sub(lhs, rhs);
            let comparison = self.gcc_icmp(IntPredicate::IntULE, res, lhs);
            let value = self.gcc_neg(self.gcc_int_cast(comparison, result_type));
            self.gcc_and(res, value)
        }
    }
}

fn try_intrinsic<'a, 'b, 'gcc, 'tcx>(
    bx: &'b mut Builder<'a, 'gcc, 'tcx>,
    try_func: RValue<'gcc>,
    data: RValue<'gcc>,
    _catch_func: RValue<'gcc>,
    dest: PlaceRef<'tcx, RValue<'gcc>>,
) {
    if !bx.sess().panic_strategy().unwinds() {
        bx.call(bx.type_void(), None, None, try_func, &[data], None, None);
        // Return 0 unconditionally from the intrinsic call;
        // we can never unwind.
        OperandValue::Immediate(bx.const_i32(0)).store(bx, dest);
    } else {
        if wants_msvc_seh(bx.sess()) {
            unimplemented!();
        }
        #[cfg(feature = "master")]
        codegen_gnu_try(bx, try_func, data, _catch_func, dest);
        #[cfg(not(feature = "master"))]
        unimplemented!();
    }
}

// Definition of the standard `try` function for Rust using the GNU-like model
// of exceptions (e.g., the normal semantics of LLVM's `landingpad` and `invoke`
// instructions).
//
// This codegen is a little surprising because we always call a shim
// function instead of inlining the call to `invoke` manually here. This is done
// because in LLVM we're only allowed to have one personality per function
// definition. The call to the `try` intrinsic is being inlined into the
// function calling it, and that function may already have other personality
// functions in play. By calling a shim we're guaranteed that our shim will have
// the right personality function.
#[cfg(feature = "master")]
fn codegen_gnu_try<'gcc, 'tcx>(
    bx: &mut Builder<'_, 'gcc, 'tcx>,
    try_func: RValue<'gcc>,
    data: RValue<'gcc>,
    catch_func: RValue<'gcc>,
    dest: PlaceRef<'tcx, RValue<'gcc>>,
) {
    let cx: &CodegenCx<'gcc, '_> = bx.cx;
    let (llty, func) = get_rust_try_fn(cx, &mut |mut bx| {
        // Codegens the shims described above:
        //
        //   bx:
        //      invoke %try_func(%data) normal %normal unwind %catch
        //
        //   normal:
        //      ret 0
        //
        //   catch:
        //      (%ptr, _) = landingpad
        //      call %catch_func(%data, %ptr)
        //      ret 1
        let then = bx.append_sibling_block("then");
        let catch = bx.append_sibling_block("catch");

        let func = bx.current_func();
        let try_func = func.get_param(0).to_rvalue();
        let data = func.get_param(1).to_rvalue();
        let catch_func = func.get_param(2).to_rvalue();
        let try_func_ty = bx.type_func(&[bx.type_i8p()], bx.type_void());

        let current_block = bx.block;

        bx.switch_to_block(then);
        bx.ret(bx.const_i32(0));

        // Type indicator for the exception being thrown.
        //
        // The value is a pointer to the exception object
        // being thrown.
        bx.switch_to_block(catch);
        bx.set_personality_fn(bx.eh_personality());

        let eh_pointer_builtin = bx.cx.context.get_target_builtin_function("__builtin_eh_pointer");
        let zero = bx.cx.context.new_rvalue_zero(bx.int_type);
        let ptr = bx.cx.context.new_call(None, eh_pointer_builtin, &[zero]);
        let catch_ty = bx.type_func(&[bx.type_i8p(), bx.type_i8p()], bx.type_void());
        bx.call(catch_ty, None, None, catch_func, &[data, ptr], None, None);
        bx.ret(bx.const_i32(1));

        // NOTE: the blocks must be filled before adding the try/catch, otherwise gcc will not
        // generate a try/catch.
        // FIXME(antoyo): add a check in the libgccjit API to prevent this.
        bx.switch_to_block(current_block);
        bx.invoke(try_func_ty, None, None, try_func, &[data], then, catch, None, None);
    });

    let func = unsafe { std::mem::transmute::<Function<'gcc>, RValue<'gcc>>(func) };

    // Note that no invoke is used here because by definition this function
    // can't panic (that's what it's catching).
    let ret = bx.call(llty, None, None, func, &[try_func, data, catch_func], None, None);
    OperandValue::Immediate(ret).store(bx, dest);
}

// Helper function used to get a handle to the `__rust_try` function used to
// catch exceptions.
//
// This function is only generated once and is then cached.
#[cfg(feature = "master")]
fn get_rust_try_fn<'a, 'gcc, 'tcx>(
    cx: &'a CodegenCx<'gcc, 'tcx>,
    codegen: &mut dyn FnMut(Builder<'a, 'gcc, 'tcx>),
) -> (Type<'gcc>, Function<'gcc>) {
    if let Some(llfn) = cx.rust_try_fn.get() {
        return llfn;
    }

    // Define the type up front for the signature of the rust_try function.
    let tcx = cx.tcx;
    let i8p = Ty::new_mut_ptr(tcx, tcx.types.i8);
    // `unsafe fn(*mut i8) -> ()`
    let try_fn_ty = Ty::new_fn_ptr(
        tcx,
        ty::Binder::dummy(tcx.mk_fn_sig_rust_abi(
            iter::once(i8p),
            tcx.types.unit,
            rustc_hir::Safety::Unsafe,
        )),
    );
    // `unsafe fn(*mut i8, *mut i8) -> ()`
    let catch_fn_ty = Ty::new_fn_ptr(
        tcx,
        ty::Binder::dummy(tcx.mk_fn_sig_rust_abi(
            [i8p, i8p].iter().cloned(),
            tcx.types.unit,
            rustc_hir::Safety::Unsafe,
        )),
    );
    // `unsafe fn(unsafe fn(*mut i8) -> (), *mut i8, unsafe fn(*mut i8, *mut i8) -> ()) -> i32`
    let rust_fn_sig = ty::Binder::dummy(cx.tcx.mk_fn_sig_rust_abi(
        [try_fn_ty, i8p, catch_fn_ty],
        tcx.types.i32,
        rustc_hir::Safety::Unsafe,
    ));
    let rust_try = gen_fn(cx, "__rust_try", rust_fn_sig, codegen);
    cx.rust_try_fn.set(Some(rust_try));
    rust_try
}

// Helper function to give a Block to a closure to codegen a shim function.
// This is currently primarily used for the `try` intrinsic functions above.
#[cfg(feature = "master")]
fn gen_fn<'a, 'gcc, 'tcx>(
    cx: &'a CodegenCx<'gcc, 'tcx>,
    name: &str,
    rust_fn_sig: ty::PolyFnSig<'tcx>,
    codegen: &mut dyn FnMut(Builder<'a, 'gcc, 'tcx>),
) -> (Type<'gcc>, Function<'gcc>) {
    let fn_abi = cx.fn_abi_of_fn_ptr(rust_fn_sig, ty::List::empty());
    let return_type = fn_abi.gcc_type(cx).return_type;
    // FIXME(eddyb) find a nicer way to do this.
    cx.linkage.set(FunctionType::Internal);
    let func = cx.declare_fn(name, fn_abi);
    cx.set_frame_pointer_type(func);
    cx.apply_target_cpu_attr(func);
    let block = Builder::append_block(cx, func, "entry-block");
    let bx = Builder::build(cx, block);
    codegen(bx);
    (return_type, func)
}