mirror of
https://github.com/rust-lang/rust.git
synced 2026-04-26 13:01:27 +03:00
Auto merge of #155343 - dianqk:indirect-by-ref, r=nikic
codegen: Copy to an alloca when the argument is neither by-val nor by-move for indirect pointer. Fixes https://github.com/rust-lang/rust/issues/155241. When a value is passed via an indirect pointer, the value needs to be copied to a new alloca. For x86_64-unknown-linux-gnu, `Thing` is the case: ```rust #[derive(Clone, Copy)] struct Thing(usize, usize, usize); pub fn foo() { let thing = Thing(0, 0, 0); bar(thing); assert_eq!(thing.0, 0); } #[inline(never)] #[unsafe(no_mangle)] pub fn bar(mut thing: Thing) { thing.0 = 1; } ``` Before passing the thing to the bar function, the thing needs to be copied to an alloca that is passed to bar. ```llvm %0 = alloca [24 x i8], align 8 call void @llvm.memcpy.p0.p0.i64(ptr align 8 %0, ptr align 8 %thing, i64 24, i1 false) call void @bar(ptr %0) ``` This patch applies the rule to the untupled arguments as well. ```rust #![feature(fn_traits)] #[derive(Clone, Copy)] struct Thing(usize, usize, usize); #[inline(never)] #[unsafe(no_mangle)] pub fn foo() { let thing = (Thing(0, 0, 0),); (|mut thing: Thing| { thing.0 = 1; }).call(thing); assert_eq!(thing.0.0, 0); } ``` For this case, this patch changes from ```llvm ; call example::foo::{closure#0} call void @_RNCNvCs15qdZVLwHPA_7example3foo0B3_(ptr ..., ptr %thing) ``` to ```llvm %0 = alloca [24 x i8], align 8 call void @llvm.memcpy.p0.p0.i64(ptr align 8 %0, ptr align 8 %thing, i64 24, i1 false) ; call example::foo::{closure#0} call void @_RNCNvCs15qdZVLwHPA_7example3foo0B3_(ptr ..., ptr %0) ``` However, the same rule cannot be applied to tail calls that would be unsound, because the caller's stack frame is overwritten by the callee's stack frame. Fortunately, https://github.com/rust-lang/rust/pull/151143 has already handled the special case. We must not copy again. No copy is needed for by-move arguments, because the argument is passed to the called "in-place". No copy is also needed for by-val arguments, because the attribute implies that a hidden copy of the pointee is made between the caller and the callee. NOTE: The patch has a trick for tail calls that we pass by-move. We can choose to copy an alloca even for by-move arguments, but tail calls require MUST-by-move.
This commit is contained in:
@@ -1147,8 +1147,9 @@ fn codegen_call_terminator(
|
||||
|
||||
// Special logic for tail calls with `PassMode::Indirect { on_stack: false, .. }` arguments.
|
||||
//
|
||||
// Normally an indirect argument with `on_stack: false` would be passed as a pointer into
|
||||
// the caller's stack frame. For tail calls, that would be unsound, because the caller's
|
||||
// Normally an indirect argument that is allocated in the caller's stack frame
|
||||
// would be passed as a pointer into the callee's stack frame.
|
||||
// For tail calls, that would be unsound, because the caller's
|
||||
// stack frame is overwritten by the callee's stack frame.
|
||||
//
|
||||
// Therefore we store the argument for the callee in the corresponding caller's slot.
|
||||
@@ -1240,59 +1241,57 @@ fn codegen_call_terminator(
|
||||
}
|
||||
}
|
||||
|
||||
match kind {
|
||||
CallKind::Normal => {
|
||||
// The callee needs to own the argument memory if we pass it
|
||||
// by-ref, so make a local copy of non-immediate constants.
|
||||
if let &mir::Operand::Copy(_) | &mir::Operand::Constant(_) = &arg.node
|
||||
&& let Ref(PlaceValue { llextra: None, .. }) = op.val
|
||||
{
|
||||
let tmp = PlaceRef::alloca(bx, op.layout);
|
||||
bx.lifetime_start(tmp.val.llval, tmp.layout.size);
|
||||
op.store_with_annotation(bx, tmp);
|
||||
op.val = Ref(tmp.val);
|
||||
lifetime_ends_after_call.push((tmp.val.llval, tmp.layout.size));
|
||||
let by_move = if let PassMode::Indirect { on_stack: false, .. } = fn_abi.args[i].mode
|
||||
&& kind == CallKind::Tail
|
||||
{
|
||||
// Special logic for tail calls with `PassMode::Indirect { on_stack: false, .. }` arguments.
|
||||
//
|
||||
// Normally an indirect argument that is allocated in the caller's stack frame
|
||||
// would be passed as a pointer into the callee's stack frame.
|
||||
// For tail calls, that would be unsound, because the caller's
|
||||
// stack frame is overwritten by the callee's stack frame.
|
||||
//
|
||||
// To handle the case, we introduce `tail_call_temporaries` to copy arguments into
|
||||
// temporaries, then copy back to the caller's argument slots.
|
||||
// Finally, we pass the caller's argument slots as arguments.
|
||||
//
|
||||
// To do that, the argument must be MUST-by-move value.
|
||||
let Some(tmp) = tail_call_temporaries[i].take() else {
|
||||
span_bug!(fn_span, "missing temporary for indirect tail call argument #{i}")
|
||||
};
|
||||
|
||||
let local = self.mir.args_iter().nth(i).unwrap();
|
||||
|
||||
match &self.locals[local] {
|
||||
LocalRef::Place(arg) => {
|
||||
bx.typed_place_copy(arg.val, tmp.val, fn_abi.args[i].layout);
|
||||
op.val = Ref(arg.val);
|
||||
}
|
||||
}
|
||||
CallKind::Tail => {
|
||||
if let PassMode::Indirect { on_stack: false, .. } = fn_abi.args[i].mode {
|
||||
let Some(tmp) = tail_call_temporaries[i].take() else {
|
||||
span_bug!(
|
||||
fn_span,
|
||||
"missing temporary for indirect tail call argument #{i}"
|
||||
)
|
||||
LocalRef::Operand(arg) => {
|
||||
let Ref(place_value) = arg.val else {
|
||||
bug!("only `Ref` should use `PassMode::Indirect`");
|
||||
};
|
||||
|
||||
let local = self.mir.args_iter().nth(i).unwrap();
|
||||
|
||||
match &self.locals[local] {
|
||||
LocalRef::Place(arg) => {
|
||||
bx.typed_place_copy(arg.val, tmp.val, fn_abi.args[i].layout);
|
||||
op.val = Ref(arg.val);
|
||||
}
|
||||
LocalRef::Operand(arg) => {
|
||||
let Ref(place_value) = arg.val else {
|
||||
bug!("only `Ref` should use `PassMode::Indirect`");
|
||||
};
|
||||
bx.typed_place_copy(place_value, tmp.val, fn_abi.args[i].layout);
|
||||
op.val = arg.val;
|
||||
}
|
||||
LocalRef::UnsizedPlace(_) => {
|
||||
span_bug!(fn_span, "unsized types are not supported")
|
||||
}
|
||||
LocalRef::PendingOperand => {
|
||||
span_bug!(fn_span, "argument local should not be pending")
|
||||
}
|
||||
};
|
||||
|
||||
bx.lifetime_end(tmp.val.llval, tmp.layout.size);
|
||||
bx.typed_place_copy(place_value, tmp.val, fn_abi.args[i].layout);
|
||||
op.val = arg.val;
|
||||
}
|
||||
}
|
||||
}
|
||||
LocalRef::UnsizedPlace(_) => {
|
||||
span_bug!(fn_span, "unsized types are not supported")
|
||||
}
|
||||
LocalRef::PendingOperand => {
|
||||
span_bug!(fn_span, "argument local should not be pending")
|
||||
}
|
||||
};
|
||||
|
||||
bx.lifetime_end(tmp.val.llval, tmp.layout.size);
|
||||
true
|
||||
} else {
|
||||
matches!(arg.node, mir::Operand::Move(_))
|
||||
};
|
||||
|
||||
self.codegen_argument(
|
||||
bx,
|
||||
op,
|
||||
by_move,
|
||||
&mut llargs,
|
||||
&fn_abi.args[i],
|
||||
&mut lifetime_ends_after_call,
|
||||
@@ -1331,6 +1330,7 @@ fn codegen_call_terminator(
|
||||
self.codegen_argument(
|
||||
bx,
|
||||
location,
|
||||
/* by_move */ false,
|
||||
&mut llargs,
|
||||
last_arg,
|
||||
&mut lifetime_ends_after_call,
|
||||
@@ -1649,6 +1649,7 @@ fn codegen_argument(
|
||||
&mut self,
|
||||
bx: &mut Bx,
|
||||
op: OperandRef<'tcx, Bx::Value>,
|
||||
by_move: bool,
|
||||
llargs: &mut Vec<Bx::Value>,
|
||||
arg: &ArgAbi<'tcx, Ty<'tcx>>,
|
||||
lifetime_ends_after_call: &mut Vec<(Bx::Value, Size)>,
|
||||
@@ -1703,18 +1704,19 @@ fn codegen_argument(
|
||||
_ => (op.immediate_or_packed_pair(bx), arg.layout.align.abi, false),
|
||||
},
|
||||
Ref(op_place_val) => match arg.mode {
|
||||
PassMode::Indirect { attrs, .. } => {
|
||||
PassMode::Indirect { attrs, on_stack, .. } => {
|
||||
// For `foo(packed.large_field)`, and types with <4 byte alignment on x86,
|
||||
// alignment requirements may be higher than the type's alignment, so copy
|
||||
// to a higher-aligned alloca.
|
||||
let required_align = match attrs.pointee_align {
|
||||
Some(pointee_align) => cmp::max(pointee_align, arg.layout.align.abi),
|
||||
None => arg.layout.align.abi,
|
||||
};
|
||||
if op_place_val.align < required_align {
|
||||
// For `foo(packed.large_field)`, and types with <4 byte alignment on x86,
|
||||
// alignment requirements may be higher than the type's alignment, so copy
|
||||
// to a higher-aligned alloca.
|
||||
// Copy to an alloca when the argument is neither by-val nor by-move.
|
||||
if op_place_val.align < required_align || (!on_stack && !by_move) {
|
||||
let scratch = PlaceValue::alloca(bx, arg.layout.size, required_align);
|
||||
bx.lifetime_start(scratch.llval, arg.layout.size);
|
||||
bx.typed_place_copy(scratch, op_place_val, op.layout);
|
||||
op.store_with_annotation(bx, scratch.with_type(arg.layout));
|
||||
lifetime_ends_after_call.push((scratch.llval, arg.layout.size));
|
||||
(scratch.llval, scratch.align, true)
|
||||
} else {
|
||||
@@ -1800,6 +1802,7 @@ fn codegen_arguments_untupled(
|
||||
lifetime_ends_after_call: &mut Vec<(Bx::Value, Size)>,
|
||||
) -> usize {
|
||||
let tuple = self.codegen_operand(bx, operand);
|
||||
let by_move = matches!(operand, mir::Operand::Move(_));
|
||||
|
||||
// Handle both by-ref and immediate tuples.
|
||||
if let Ref(place_val) = tuple.val {
|
||||
@@ -1810,13 +1813,20 @@ fn codegen_arguments_untupled(
|
||||
for i in 0..tuple.layout.fields.count() {
|
||||
let field_ptr = tuple_ptr.project_field(bx, i);
|
||||
let field = bx.load_operand(field_ptr);
|
||||
self.codegen_argument(bx, field, llargs, &args[i], lifetime_ends_after_call);
|
||||
self.codegen_argument(
|
||||
bx,
|
||||
field,
|
||||
by_move,
|
||||
llargs,
|
||||
&args[i],
|
||||
lifetime_ends_after_call,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// If the tuple is immediate, the elements are as well.
|
||||
for i in 0..tuple.layout.fields.count() {
|
||||
let op = tuple.extract_field(self, bx, i);
|
||||
self.codegen_argument(bx, op, llargs, &args[i], lifetime_ends_after_call);
|
||||
self.codegen_argument(bx, op, by_move, llargs, &args[i], lifetime_ends_after_call);
|
||||
}
|
||||
}
|
||||
tuple.layout.fields.count()
|
||||
|
||||
@@ -11,19 +11,12 @@
|
||||
use minicore::*;
|
||||
|
||||
// Const operand. Regression test for #98156.
|
||||
// Temporary allocas are not required when passing as byval arguments.
|
||||
//
|
||||
// CHECK-LABEL: define void @const_indirect(
|
||||
// CHECK-NEXT: start:
|
||||
// CHECK-NEXT: [[B:%.*]] = alloca
|
||||
// CHECK-NEXT: [[A:%.*]] = alloca
|
||||
// CHECK-NEXT: call void @llvm.lifetime.start.p0({{(i64 4096, )?}}ptr [[A]])
|
||||
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 4096, i1 false)
|
||||
// CHECK-NEXT: call void %h(ptr {{.*}} [[A]])
|
||||
// CHECK-NEXT: call void @llvm.lifetime.end.p0({{(i64 4096, )?}}ptr [[A]])
|
||||
// CHECK-NEXT: call void @llvm.lifetime.start.p0({{(i64 4096, )?}}ptr [[B]])
|
||||
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 4096, i1 false)
|
||||
// CHECK-NEXT: call void %h(ptr {{.*}} [[B]])
|
||||
// CHECK-NEXT: call void @llvm.lifetime.end.p0({{(i64 4096, )?}}ptr [[B]])
|
||||
// CHECK-NEXT: call void %h(ptr {{.*}}byval([4096 x i8]){{.*}} [[C:@anon.*]])
|
||||
// CHECK-NEXT: call void %h(ptr {{.*}}byval([4096 x i8]){{.*}} [[C]])
|
||||
#[no_mangle]
|
||||
pub fn const_indirect(h: extern "C" fn([u32; 1024])) {
|
||||
const C: [u32; 1024] = [0; 1024];
|
||||
|
||||
@@ -74,7 +74,8 @@ pub fn do_call() {
|
||||
// CHECK: call void @test_simd(<4 x i32> <i32 2, i32 4, i32 6, i32 8>
|
||||
test_simd(const { Simd::<i32, 4>([2, 4, 6, 8]) });
|
||||
|
||||
// CHECK: call void @test_simd_unaligned(%"minisimd::PackedSimd<i32, 3>" %1
|
||||
// CHECK: [[UNALIGNED_ARG:%.*]] = load %"minisimd::PackedSimd<i32, 3>", ptr @anon{{.*}}
|
||||
// CHECK-NEXT: call void @test_simd_unaligned(%"minisimd::PackedSimd<i32, 3>" [[UNALIGNED_ARG]]
|
||||
test_simd_unaligned(const { Simd::<i32, 3>([2, 4, 6]) });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,102 @@
|
||||
//! Regression test for issue <https://github.com/rust-lang/rust/issues/155241>.
|
||||
//! Arguments passed indirectly via a hidden pointer must be copied to an alloca,
|
||||
//! except for by-val or by-move.
|
||||
//@ add-minicore
|
||||
//@ revisions: x64-linux i686-linux i686-windows
|
||||
//@ compile-flags: -Cno-prepopulate-passes -Copt-level=3
|
||||
//@[x64-linux] compile-flags: --target x86_64-unknown-linux-gnu
|
||||
//@[x64-linux] needs-llvm-components: x86
|
||||
//@[i686-linux] compile-flags: --target i686-unknown-linux-gnu
|
||||
//@[i686-linux] needs-llvm-components: x86
|
||||
//@[i686-windows] compile-flags: --target i686-pc-windows-msvc
|
||||
//@[i686-windows] needs-llvm-components: x86
|
||||
|
||||
#![crate_type = "lib"]
|
||||
#![feature(stmt_expr_attributes, no_core)]
|
||||
#![expect(unused)]
|
||||
#![no_std]
|
||||
#![no_core]
|
||||
|
||||
extern crate minicore;
|
||||
use minicore::*;
|
||||
|
||||
struct Thing(u64, u64, u64);
|
||||
|
||||
impl Copy for Thing {}
|
||||
|
||||
// The argument of the second call is a by-move argument.
|
||||
|
||||
// CHECK-LABEL: @normal
|
||||
// CHECK: call void @llvm.memcpy{{.*}}(ptr{{.*}} [[normal_V1:%.*]], ptr{{.*}} %value,
|
||||
// CHECK: call void @opaque(ptr{{.*}} [[normal_V1]])
|
||||
// CHECK: call void @opaque(ptr{{.*}} %value)
|
||||
// CHECK: call void @llvm.memcpy{{.*}}(ptr{{.*}} [[normal_V3:%.*]], ptr{{.*}} @anon{{.*}},
|
||||
// CHECK: call void @opaque(ptr{{.*}} [[normal_V3]])
|
||||
#[unsafe(no_mangle)]
|
||||
pub fn normal() {
|
||||
#[inline(never)]
|
||||
#[unsafe(no_mangle)]
|
||||
fn opaque(mut thing: Thing) {
|
||||
thing.0 = 1;
|
||||
}
|
||||
let value = Thing(0, 0, 0);
|
||||
opaque(value);
|
||||
opaque(value);
|
||||
const VALUE: Thing = Thing(0, 0, 0);
|
||||
opaque(VALUE);
|
||||
}
|
||||
|
||||
// The argument of the second call is a by-move argument.
|
||||
|
||||
// CHECK-LABEL: @untupled
|
||||
// CHECK: call void @llvm.memcpy{{.*}}(ptr{{.*}} [[untupled_V1:%.*]], ptr{{.*}} %value
|
||||
// CHECK: call indirect_bycopy_bymove_byval::untupled::{closure#0}
|
||||
// CHECK-NEXT: call void @{{.*}}(ptr {{.*}}, ptr{{.*}} [[untupled_V1]])
|
||||
// CHECK: call indirect_bycopy_bymove_byval::untupled::{closure#1}
|
||||
// CHECK-NEXT: call void @{{.*}}(ptr {{.*}}, ptr{{.*}} %value)
|
||||
// CHECK: call void @llvm.memcpy{{.*}}(ptr{{.*}} [[untupled_V3:%.*]], ptr{{.*}} @anon{{.*}}
|
||||
// CHECK: call indirect_bycopy_bymove_byval::untupled::{closure#2}
|
||||
// CHECK-NEXT: call void @{{.*}}(ptr {{.*}}, ptr{{.*}} [[untupled_V3]])
|
||||
#[unsafe(no_mangle)]
|
||||
pub fn untupled() {
|
||||
let value = (Thing(0, 0, 0),);
|
||||
(#[inline(never)]
|
||||
|mut thing: Thing| {
|
||||
thing.0 = 1;
|
||||
})
|
||||
.call(value);
|
||||
(#[inline(never)]
|
||||
|mut thing: Thing| {
|
||||
thing.0 = 2;
|
||||
})
|
||||
.call(value);
|
||||
const VALUE: (Thing,) = (Thing(0, 0, 0),);
|
||||
(#[inline(never)]
|
||||
|mut thing: Thing| {
|
||||
thing.0 = 3;
|
||||
})
|
||||
.call(VALUE);
|
||||
}
|
||||
|
||||
// All memcpy calls are redundant for byval.
|
||||
|
||||
// CHECK-LABEL: @byval
|
||||
// CHECK: call void @opaque_byval(ptr{{.*}} byval([24 x i8]){{.*}} %value)
|
||||
// CHECK: call void @opaque_byval(ptr{{.*}} byval([24 x i8]){{.*}} %value)
|
||||
// CHECK: call void @opaque_byval(ptr{{.*}} byval([24 x i8]){{.*}} @anon{{.*}})
|
||||
#[unsafe(no_mangle)]
|
||||
pub fn byval() {
|
||||
#[repr(C)]
|
||||
struct Thing(u64, u64, u64);
|
||||
impl Copy for Thing {}
|
||||
#[inline(never)]
|
||||
#[unsafe(no_mangle)]
|
||||
extern "C" fn opaque_byval(mut thing: Thing) {
|
||||
thing.0 = 1;
|
||||
}
|
||||
let value = Thing(0, 0, 0);
|
||||
opaque_byval(value);
|
||||
opaque_byval(value);
|
||||
const VALUE: Thing = Thing(0, 0, 0);
|
||||
opaque_byval(VALUE);
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
//! Regression test for issue <https://github.com/rust-lang/rust/issues/155241>.
|
||||
//@ run-pass
|
||||
//@ revisions: noopt opt
|
||||
//@[noopt] compile-flags: -C opt-level=0
|
||||
//@[opt] compile-flags: -C opt-level=3
|
||||
|
||||
#![feature(fn_traits, stmt_expr_attributes)]
|
||||
#![expect(unused)]
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
struct Thing {
|
||||
x: usize,
|
||||
y: usize,
|
||||
z: usize,
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn opt_0() {
|
||||
let value = (Thing { x: 0, y: 0, z: 0 },);
|
||||
(|mut thing: Thing| {
|
||||
thing.z = 1;
|
||||
})
|
||||
.call(value);
|
||||
assert_eq!(value.0.z, 0);
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn opt_3() {
|
||||
fn with(f: impl FnOnce(Vec<usize>)) {
|
||||
f(Vec::new())
|
||||
}
|
||||
with(|mut v| v.resize(2, 1));
|
||||
with(|v| {
|
||||
if v.len() != 0 {
|
||||
unreachable!();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn const_() {
|
||||
const VALUE: (Thing,) = (Thing { x: 0, y: 0, z: 0 },);
|
||||
|
||||
(#[inline(never)]
|
||||
|mut thing: Thing| {
|
||||
thing.z = 1;
|
||||
std::hint::black_box(&mut thing.z);
|
||||
assert_eq!(thing.z, 1);
|
||||
})
|
||||
.call(VALUE);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
opt_0();
|
||||
opt_3();
|
||||
const_();
|
||||
}
|
||||
Reference in New Issue
Block a user