Use convergent attribute to funcs for GPU targets

On targets with convergent operations, we need to add the convergent
attribute to all functions that run convergent operations. Following
clang, we can conservatively apply the attribute to all functions when
compiling for such a target and rely on LLVM optimizing away the
attribute in cases where it is not necessary.

This affects the amdgpu and nvptx targets.
This commit is contained in:
Flakebi
2025-12-04 10:37:26 +01:00
parent 6bdc342ddb
commit 8e932ed79c
5 changed files with 51 additions and 1 deletions
@@ -136,6 +136,17 @@ fn create_wrapper_function(
None
};
if tcx.sess.target.is_like_gpu {
// Conservatively apply convergent to all functions in case they may call
// a convergent function. Rely on LLVM to optimize away the unnecessary
// convergent attributes.
attributes::apply_to_llfn(
llfn,
llvm::AttributePlace::Function,
&[llvm::AttributeKind::Convergent.create_attr(cx.llcx)],
);
}
let llbb = unsafe { llvm::LLVMAppendBasicBlockInContext(cx.llcx, llfn, c"entry".as_ptr()) };
let mut bx = SBuilder::build(&cx, llbb);
+8 -1
View File
@@ -14,7 +14,7 @@
use std::borrow::Borrow;
use itertools::Itertools;
use rustc_codegen_ssa::traits::TypeMembershipCodegenMethods;
use rustc_codegen_ssa::traits::{MiscCodegenMethods, TypeMembershipCodegenMethods};
use rustc_data_structures::fx::FxIndexSet;
use rustc_middle::ty::{Instance, Ty};
use rustc_sanitizers::{cfi, kcfi};
@@ -70,6 +70,13 @@ pub(crate) fn declare_raw_fn<'ll, 'tcx>(
let mut attrs = SmallVec::<[_; 4]>::new();
if cx.sess().target.is_like_gpu {
// Conservatively apply convergent to all functions in case they may call
// a convergent function. Rely on LLVM to optimize away the unnecessary
// convergent attributes.
attrs.push(llvm::AttributeKind::Convergent.create_attr(cx.llcx));
}
if cx.tcx.sess.opts.cg.no_redzone.unwrap_or(cx.tcx.sess.target.disable_redzone) {
attrs.push(llvm::AttributeKind::NoRedZone.create_attr(cx.llcx));
}
@@ -293,6 +293,7 @@ pub(crate) enum AttributeKind {
CapturesNone = 46,
SanitizeRealtimeNonblocking = 47,
SanitizeRealtimeBlocking = 48,
Convergent = 49,
}
/// LLVMIntPredicate
@@ -361,6 +361,7 @@ enum class LLVMRustAttributeKind {
CapturesNone = 46,
SanitizeRealtimeNonblocking = 47,
SanitizeRealtimeBlocking = 48,
Convergent = 49,
};
static Attribute::AttrKind fromRust(LLVMRustAttributeKind Kind) {
@@ -457,6 +458,8 @@ static Attribute::AttrKind fromRust(LLVMRustAttributeKind Kind) {
return Attribute::SanitizeRealtime;
case LLVMRustAttributeKind::SanitizeRealtimeBlocking:
return Attribute::SanitizeRealtimeBlocking;
case LLVMRustAttributeKind::Convergent:
return Attribute::Convergent;
}
report_fatal_error("bad LLVMRustAttributeKind");
}
+28
View File
@@ -0,0 +1,28 @@
// Checks that when compiling for GPU targets, the convergent attribute
// is added to function declarations and definitions.
//@ add-minicore
//@ revisions: amdgpu nvptx
//@ [amdgpu] compile-flags: --crate-type=rlib --target=amdgcn-amd-amdhsa -Ctarget-cpu=gfx900
//@ [amdgpu] needs-llvm-components: amdgpu
//@ [nvptx] compile-flags: --crate-type=rlib --target=nvptx64-nvidia-cuda
//@ [nvptx] needs-llvm-components: nvptx
#![feature(no_core, lang_items, abi_gpu_kernel)]
#![no_core]
extern crate minicore;
use minicore::*;
extern "C" {
fn ext();
}
// CHECK: define {{.*}}_kernel void @fun(i32{{.*}}) unnamed_addr #[[ATTR:[0-9]+]] {
// CHECK: declare void @ext() unnamed_addr #[[ATTR]]
// CHECK: attributes #[[ATTR]] = {{.*}} convergent
#[no_mangle]
pub extern "gpu-kernel" fn fun(_: i32) {
unsafe {
ext();
}
}