Auto merge of #155343 - dianqk:indirect-by-ref, r=nikic

codegen: Copy to an alloca when the argument is neither by-val nor by-move for indirect pointer.



Fixes https://github.com/rust-lang/rust/issues/155241.

When a value is passed via an indirect pointer, the value needs to be copied to a new alloca. For x86_64-unknown-linux-gnu, `Thing` is the case:

```rust
#[derive(Clone, Copy)]
struct Thing(usize, usize, usize);

pub fn foo() {
    let thing = Thing(0, 0, 0);
    bar(thing);
    assert_eq!(thing.0, 0);
}

#[inline(never)]
#[unsafe(no_mangle)]
pub fn bar(mut thing: Thing) {
    thing.0 = 1;
}
```

Before passing the thing to the bar function, the thing needs to be copied to an alloca that is passed to bar.

```llvm
%0 = alloca [24 x i8], align 8
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %0, ptr align 8 %thing, i64 24, i1 false)
call void @bar(ptr %0)
```

This patch applies the rule to the untupled arguments as well.

```rust
#![feature(fn_traits)]

#[derive(Clone, Copy)]
struct Thing(usize, usize, usize);

#[inline(never)]
#[unsafe(no_mangle)]
pub fn foo() {
    let thing = (Thing(0, 0, 0),);
    (|mut thing: Thing| {
        thing.0 = 1;
    }).call(thing);
    assert_eq!(thing.0.0, 0);
}
```

For this case, this patch changes from

```llvm
; call example::foo::{closure#0}
call void @_RNCNvCs15qdZVLwHPA_7example3foo0B3_(ptr ..., ptr %thing)
```

to

```llvm
%0 = alloca [24 x i8], align 8
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %0, ptr align 8 %thing, i64 24, i1 false)
; call example::foo::{closure#0}
call void @_RNCNvCs15qdZVLwHPA_7example3foo0B3_(ptr ..., ptr %0)
```

However, the same rule cannot be applied to tail calls that would be unsound, because the caller's stack frame is overwritten by the callee's stack frame. Fortunately, https://github.com/rust-lang/rust/pull/151143 has already handled the special case. We must not copy again.

No copy is needed for by-move arguments, because the argument is passed to the called "in-place".

No copy is also needed for by-val arguments, because the attribute implies that a hidden copy of the pointee is made between the caller and the callee.


NOTE: The patch has a trick for tail calls that we pass by-move. We can choose to copy an alloca even for by-move arguments, but tail calls require MUST-by-move.
This commit is contained in:
bors
2026-04-22 15:47:21 +00:00
5 changed files with 230 additions and 67 deletions
+66 -56
View File
@@ -1147,8 +1147,9 @@ fn codegen_call_terminator(
// Special logic for tail calls with `PassMode::Indirect { on_stack: false, .. }` arguments.
//
// Normally an indirect argument with `on_stack: false` would be passed as a pointer into
// the caller's stack frame. For tail calls, that would be unsound, because the caller's
// Normally an indirect argument that is allocated in the caller's stack frame
// would be passed as a pointer into the callee's stack frame.
// For tail calls, that would be unsound, because the caller's
// stack frame is overwritten by the callee's stack frame.
//
// Therefore we store the argument for the callee in the corresponding caller's slot.
@@ -1240,59 +1241,57 @@ fn codegen_call_terminator(
}
}
match kind {
CallKind::Normal => {
// The callee needs to own the argument memory if we pass it
// by-ref, so make a local copy of non-immediate constants.
if let &mir::Operand::Copy(_) | &mir::Operand::Constant(_) = &arg.node
&& let Ref(PlaceValue { llextra: None, .. }) = op.val
{
let tmp = PlaceRef::alloca(bx, op.layout);
bx.lifetime_start(tmp.val.llval, tmp.layout.size);
op.store_with_annotation(bx, tmp);
op.val = Ref(tmp.val);
lifetime_ends_after_call.push((tmp.val.llval, tmp.layout.size));
let by_move = if let PassMode::Indirect { on_stack: false, .. } = fn_abi.args[i].mode
&& kind == CallKind::Tail
{
// Special logic for tail calls with `PassMode::Indirect { on_stack: false, .. }` arguments.
//
// Normally an indirect argument that is allocated in the caller's stack frame
// would be passed as a pointer into the callee's stack frame.
// For tail calls, that would be unsound, because the caller's
// stack frame is overwritten by the callee's stack frame.
//
// To handle the case, we introduce `tail_call_temporaries` to copy arguments into
// temporaries, then copy back to the caller's argument slots.
// Finally, we pass the caller's argument slots as arguments.
//
// To do that, the argument must be MUST-by-move value.
let Some(tmp) = tail_call_temporaries[i].take() else {
span_bug!(fn_span, "missing temporary for indirect tail call argument #{i}")
};
let local = self.mir.args_iter().nth(i).unwrap();
match &self.locals[local] {
LocalRef::Place(arg) => {
bx.typed_place_copy(arg.val, tmp.val, fn_abi.args[i].layout);
op.val = Ref(arg.val);
}
}
CallKind::Tail => {
if let PassMode::Indirect { on_stack: false, .. } = fn_abi.args[i].mode {
let Some(tmp) = tail_call_temporaries[i].take() else {
span_bug!(
fn_span,
"missing temporary for indirect tail call argument #{i}"
)
LocalRef::Operand(arg) => {
let Ref(place_value) = arg.val else {
bug!("only `Ref` should use `PassMode::Indirect`");
};
let local = self.mir.args_iter().nth(i).unwrap();
match &self.locals[local] {
LocalRef::Place(arg) => {
bx.typed_place_copy(arg.val, tmp.val, fn_abi.args[i].layout);
op.val = Ref(arg.val);
}
LocalRef::Operand(arg) => {
let Ref(place_value) = arg.val else {
bug!("only `Ref` should use `PassMode::Indirect`");
};
bx.typed_place_copy(place_value, tmp.val, fn_abi.args[i].layout);
op.val = arg.val;
}
LocalRef::UnsizedPlace(_) => {
span_bug!(fn_span, "unsized types are not supported")
}
LocalRef::PendingOperand => {
span_bug!(fn_span, "argument local should not be pending")
}
};
bx.lifetime_end(tmp.val.llval, tmp.layout.size);
bx.typed_place_copy(place_value, tmp.val, fn_abi.args[i].layout);
op.val = arg.val;
}
}
}
LocalRef::UnsizedPlace(_) => {
span_bug!(fn_span, "unsized types are not supported")
}
LocalRef::PendingOperand => {
span_bug!(fn_span, "argument local should not be pending")
}
};
bx.lifetime_end(tmp.val.llval, tmp.layout.size);
true
} else {
matches!(arg.node, mir::Operand::Move(_))
};
self.codegen_argument(
bx,
op,
by_move,
&mut llargs,
&fn_abi.args[i],
&mut lifetime_ends_after_call,
@@ -1331,6 +1330,7 @@ fn codegen_call_terminator(
self.codegen_argument(
bx,
location,
/* by_move */ false,
&mut llargs,
last_arg,
&mut lifetime_ends_after_call,
@@ -1649,6 +1649,7 @@ fn codegen_argument(
&mut self,
bx: &mut Bx,
op: OperandRef<'tcx, Bx::Value>,
by_move: bool,
llargs: &mut Vec<Bx::Value>,
arg: &ArgAbi<'tcx, Ty<'tcx>>,
lifetime_ends_after_call: &mut Vec<(Bx::Value, Size)>,
@@ -1703,18 +1704,19 @@ fn codegen_argument(
_ => (op.immediate_or_packed_pair(bx), arg.layout.align.abi, false),
},
Ref(op_place_val) => match arg.mode {
PassMode::Indirect { attrs, .. } => {
PassMode::Indirect { attrs, on_stack, .. } => {
// For `foo(packed.large_field)`, and types with <4 byte alignment on x86,
// alignment requirements may be higher than the type's alignment, so copy
// to a higher-aligned alloca.
let required_align = match attrs.pointee_align {
Some(pointee_align) => cmp::max(pointee_align, arg.layout.align.abi),
None => arg.layout.align.abi,
};
if op_place_val.align < required_align {
// For `foo(packed.large_field)`, and types with <4 byte alignment on x86,
// alignment requirements may be higher than the type's alignment, so copy
// to a higher-aligned alloca.
// Copy to an alloca when the argument is neither by-val nor by-move.
if op_place_val.align < required_align || (!on_stack && !by_move) {
let scratch = PlaceValue::alloca(bx, arg.layout.size, required_align);
bx.lifetime_start(scratch.llval, arg.layout.size);
bx.typed_place_copy(scratch, op_place_val, op.layout);
op.store_with_annotation(bx, scratch.with_type(arg.layout));
lifetime_ends_after_call.push((scratch.llval, arg.layout.size));
(scratch.llval, scratch.align, true)
} else {
@@ -1800,6 +1802,7 @@ fn codegen_arguments_untupled(
lifetime_ends_after_call: &mut Vec<(Bx::Value, Size)>,
) -> usize {
let tuple = self.codegen_operand(bx, operand);
let by_move = matches!(operand, mir::Operand::Move(_));
// Handle both by-ref and immediate tuples.
if let Ref(place_val) = tuple.val {
@@ -1810,13 +1813,20 @@ fn codegen_arguments_untupled(
for i in 0..tuple.layout.fields.count() {
let field_ptr = tuple_ptr.project_field(bx, i);
let field = bx.load_operand(field_ptr);
self.codegen_argument(bx, field, llargs, &args[i], lifetime_ends_after_call);
self.codegen_argument(
bx,
field,
by_move,
llargs,
&args[i],
lifetime_ends_after_call,
);
}
} else {
// If the tuple is immediate, the elements are as well.
for i in 0..tuple.layout.fields.count() {
let op = tuple.extract_field(self, bx, i);
self.codegen_argument(bx, op, llargs, &args[i], lifetime_ends_after_call);
self.codegen_argument(bx, op, by_move, llargs, &args[i], lifetime_ends_after_call);
}
}
tuple.layout.fields.count()
+3 -10
View File
@@ -11,19 +11,12 @@
use minicore::*;
// Const operand. Regression test for #98156.
// Temporary allocas are not required when passing as byval arguments.
//
// CHECK-LABEL: define void @const_indirect(
// CHECK-NEXT: start:
// CHECK-NEXT: [[B:%.*]] = alloca
// CHECK-NEXT: [[A:%.*]] = alloca
// CHECK-NEXT: call void @llvm.lifetime.start.p0({{(i64 4096, )?}}ptr [[A]])
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 4096, i1 false)
// CHECK-NEXT: call void %h(ptr {{.*}} [[A]])
// CHECK-NEXT: call void @llvm.lifetime.end.p0({{(i64 4096, )?}}ptr [[A]])
// CHECK-NEXT: call void @llvm.lifetime.start.p0({{(i64 4096, )?}}ptr [[B]])
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 4096, i1 false)
// CHECK-NEXT: call void %h(ptr {{.*}} [[B]])
// CHECK-NEXT: call void @llvm.lifetime.end.p0({{(i64 4096, )?}}ptr [[B]])
// CHECK-NEXT: call void %h(ptr {{.*}}byval([4096 x i8]){{.*}} [[C:@anon.*]])
// CHECK-NEXT: call void %h(ptr {{.*}}byval([4096 x i8]){{.*}} [[C]])
#[no_mangle]
pub fn const_indirect(h: extern "C" fn([u32; 1024])) {
const C: [u32; 1024] = [0; 1024];
+2 -1
View File
@@ -74,7 +74,8 @@ pub fn do_call() {
// CHECK: call void @test_simd(<4 x i32> <i32 2, i32 4, i32 6, i32 8>
test_simd(const { Simd::<i32, 4>([2, 4, 6, 8]) });
// CHECK: call void @test_simd_unaligned(%"minisimd::PackedSimd<i32, 3>" %1
// CHECK: [[UNALIGNED_ARG:%.*]] = load %"minisimd::PackedSimd<i32, 3>", ptr @anon{{.*}}
// CHECK-NEXT: call void @test_simd_unaligned(%"minisimd::PackedSimd<i32, 3>" [[UNALIGNED_ARG]]
test_simd_unaligned(const { Simd::<i32, 3>([2, 4, 6]) });
}
}
@@ -0,0 +1,102 @@
//! Regression test for issue <https://github.com/rust-lang/rust/issues/155241>.
//! Arguments passed indirectly via a hidden pointer must be copied to an alloca,
//! except for by-val or by-move.
//@ add-minicore
//@ revisions: x64-linux i686-linux i686-windows
//@ compile-flags: -Cno-prepopulate-passes -Copt-level=3
//@[x64-linux] compile-flags: --target x86_64-unknown-linux-gnu
//@[x64-linux] needs-llvm-components: x86
//@[i686-linux] compile-flags: --target i686-unknown-linux-gnu
//@[i686-linux] needs-llvm-components: x86
//@[i686-windows] compile-flags: --target i686-pc-windows-msvc
//@[i686-windows] needs-llvm-components: x86
#![crate_type = "lib"]
#![feature(stmt_expr_attributes, no_core)]
#![expect(unused)]
#![no_std]
#![no_core]
extern crate minicore;
use minicore::*;
struct Thing(u64, u64, u64);
impl Copy for Thing {}
// The argument of the second call is a by-move argument.
// CHECK-LABEL: @normal
// CHECK: call void @llvm.memcpy{{.*}}(ptr{{.*}} [[normal_V1:%.*]], ptr{{.*}} %value,
// CHECK: call void @opaque(ptr{{.*}} [[normal_V1]])
// CHECK: call void @opaque(ptr{{.*}} %value)
// CHECK: call void @llvm.memcpy{{.*}}(ptr{{.*}} [[normal_V3:%.*]], ptr{{.*}} @anon{{.*}},
// CHECK: call void @opaque(ptr{{.*}} [[normal_V3]])
#[unsafe(no_mangle)]
pub fn normal() {
#[inline(never)]
#[unsafe(no_mangle)]
fn opaque(mut thing: Thing) {
thing.0 = 1;
}
let value = Thing(0, 0, 0);
opaque(value);
opaque(value);
const VALUE: Thing = Thing(0, 0, 0);
opaque(VALUE);
}
// The argument of the second call is a by-move argument.
// CHECK-LABEL: @untupled
// CHECK: call void @llvm.memcpy{{.*}}(ptr{{.*}} [[untupled_V1:%.*]], ptr{{.*}} %value
// CHECK: call indirect_bycopy_bymove_byval::untupled::{closure#0}
// CHECK-NEXT: call void @{{.*}}(ptr {{.*}}, ptr{{.*}} [[untupled_V1]])
// CHECK: call indirect_bycopy_bymove_byval::untupled::{closure#1}
// CHECK-NEXT: call void @{{.*}}(ptr {{.*}}, ptr{{.*}} %value)
// CHECK: call void @llvm.memcpy{{.*}}(ptr{{.*}} [[untupled_V3:%.*]], ptr{{.*}} @anon{{.*}}
// CHECK: call indirect_bycopy_bymove_byval::untupled::{closure#2}
// CHECK-NEXT: call void @{{.*}}(ptr {{.*}}, ptr{{.*}} [[untupled_V3]])
#[unsafe(no_mangle)]
pub fn untupled() {
let value = (Thing(0, 0, 0),);
(#[inline(never)]
|mut thing: Thing| {
thing.0 = 1;
})
.call(value);
(#[inline(never)]
|mut thing: Thing| {
thing.0 = 2;
})
.call(value);
const VALUE: (Thing,) = (Thing(0, 0, 0),);
(#[inline(never)]
|mut thing: Thing| {
thing.0 = 3;
})
.call(VALUE);
}
// All memcpy calls are redundant for byval.
// CHECK-LABEL: @byval
// CHECK: call void @opaque_byval(ptr{{.*}} byval([24 x i8]){{.*}} %value)
// CHECK: call void @opaque_byval(ptr{{.*}} byval([24 x i8]){{.*}} %value)
// CHECK: call void @opaque_byval(ptr{{.*}} byval([24 x i8]){{.*}} @anon{{.*}})
#[unsafe(no_mangle)]
pub fn byval() {
#[repr(C)]
struct Thing(u64, u64, u64);
impl Copy for Thing {}
#[inline(never)]
#[unsafe(no_mangle)]
extern "C" fn opaque_byval(mut thing: Thing) {
thing.0 = 1;
}
let value = Thing(0, 0, 0);
opaque_byval(value);
opaque_byval(value);
const VALUE: Thing = Thing(0, 0, 0);
opaque_byval(VALUE);
}
+57
View File
@@ -0,0 +1,57 @@
//! Regression test for issue <https://github.com/rust-lang/rust/issues/155241>.
//@ run-pass
//@ revisions: noopt opt
//@[noopt] compile-flags: -C opt-level=0
//@[opt] compile-flags: -C opt-level=3
#![feature(fn_traits, stmt_expr_attributes)]
#![expect(unused)]
#[derive(Copy, Clone)]
struct Thing {
x: usize,
y: usize,
z: usize,
}
#[inline(never)]
fn opt_0() {
let value = (Thing { x: 0, y: 0, z: 0 },);
(|mut thing: Thing| {
thing.z = 1;
})
.call(value);
assert_eq!(value.0.z, 0);
}
#[inline(never)]
fn opt_3() {
fn with(f: impl FnOnce(Vec<usize>)) {
f(Vec::new())
}
with(|mut v| v.resize(2, 1));
with(|v| {
if v.len() != 0 {
unreachable!();
}
});
}
#[inline(never)]
fn const_() {
const VALUE: (Thing,) = (Thing { x: 0, y: 0, z: 0 },);
(#[inline(never)]
|mut thing: Thing| {
thing.z = 1;
std::hint::black_box(&mut thing.z);
assert_eq!(thing.z, 1);
})
.call(VALUE);
}
fn main() {
opt_0();
opt_3();
const_();
}