Rollup merge of #156941 - ZuseZ4:update-offload-file-naming, r=oli-obk

Offload: Update confusing and outdated file name

We swapped the order in which we do host and device compilations. The file getting renamed is effectively the output of the clang-offload-packager, the first magic bytes (`00000000  10 ff 10 ad`) confirm it. I was just creating a figure to visualize and noticed that the name is off.

cc @Kevinsala @jdoerfert is there any naming convention?

r? oli-obk
This commit is contained in:
Jonathan Brouwer
2026-05-28 10:36:45 +02:00
committed by GitHub
4 changed files with 22 additions and 20 deletions
@@ -805,10 +805,10 @@ fn handle_offload<'ll>(cx: &'ll SimpleCx<'_>, old_fn: &llvm::Value) {
if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Device) {
let device_path = cgcx.output_filenames.path(OutputType::Object);
let device_dir = device_path.parent().unwrap();
let device_out = device_dir.join("host.out");
let device_out = device_dir.join("device.bin");
let device_out_c = path_to_c_string(device_out.as_path());
unsafe {
// 1) Bundle device module into offload image host.out (device TM)
// 1) Bundle device module into offload image device.bin (device TM)
let ok = llvm::LLVMRustBundleImages(
module.module_llvm.llmod(),
module.module_llvm.tm.raw(),
@@ -821,7 +821,7 @@ fn handle_offload<'ll>(cx: &'ll SimpleCx<'_>, old_fn: &llvm::Value) {
}
// This assumes that we previously compiled our kernels for a gpu target, which created a
// `host.out` artifact. The user is supposed to provide us with a path to this artifact, we
// `device.bin` artifact. The user is supposed to provide us with a path to this artifact, we
// don't need any other artifacts from the previous run. We will embed this artifact into our
// LLVM-IR host module, to create a `host.o` ObjectFile, which we will write to disk.
// The last, not yet automated steps uses the `clang-linker-wrapper` to process `host.o`.
@@ -837,7 +837,7 @@ fn handle_offload<'ll>(cx: &'ll SimpleCx<'_>, old_fn: &llvm::Value) {
} else if device_pathbuf
.file_name()
.and_then(|n| n.to_str())
.is_some_and(|n| n != "host.out")
.is_some_and(|n| n != "device.bin")
{
dcx.emit_err(crate::errors::OffloadWrongFileName);
} else if !device_pathbuf.exists() {
@@ -846,14 +846,14 @@ fn handle_offload<'ll>(cx: &'ll SimpleCx<'_>, old_fn: &llvm::Value) {
let host_path = cgcx.output_filenames.path(OutputType::Object);
let host_dir = host_path.parent().unwrap();
let out_obj = host_dir.join("host.o");
let host_out_c = path_to_c_string(device_pathbuf.as_path());
let device_bin_c = path_to_c_string(device_pathbuf.as_path());
// 2) Finalize host: lib.bc + host.out -> host.o (host TM)
// 2) Finalize host: lib.bc + device.bin -> host.o (host TM)
// We create a full clone of our LLVM host module, since we will embed the device IR
// into it, and this might break caching or incremental compilation otherwise.
let llmod2 = llvm::LLVMCloneModule(module.module_llvm.llmod());
let ok =
unsafe { llvm::LLVMRustOffloadEmbedBufferInModule(llmod2, host_out_c.as_ptr()) };
unsafe { llvm::LLVMRustOffloadEmbedBufferInModule(llmod2, device_bin_c.as_ptr()) };
if !ok {
dcx.emit_err(crate::errors::OffloadEmbedFailed);
}
@@ -868,7 +868,7 @@ fn handle_offload<'ll>(cx: &'ll SimpleCx<'_>, old_fn: &llvm::Value) {
prof,
true,
);
// We ignore cgcx.save_temps here and unconditionally always keep our `host.out` artifact.
// We ignore cgcx.save_temps here and unconditionally always keep our `device.bin` artifact.
// Otherwise, recompiling the host code would fail since we deleted that device artifact
// in the previous host compilation, which would be confusing at best.
}
+7 -5
View File
@@ -58,7 +58,9 @@ pub(crate) struct AutoDiffComponentMissing {
pub(crate) struct AutoDiffWithoutEnable;
#[derive(Diagnostic)]
#[diag("using the offload feature requires -Z offload=<Device or Host=/absolute/path/to/host.out>")]
#[diag(
"using the offload feature requires -Z offload=<Device or Host=/absolute/path/to/device.bin>"
)]
pub(crate) struct OffloadWithoutEnable;
#[derive(Diagnostic)]
@@ -66,23 +68,23 @@ pub(crate) struct AutoDiffComponentMissing {
pub(crate) struct OffloadWithoutFatLTO;
#[derive(Diagnostic)]
#[diag("using the `-Z offload=Host=/absolute/path/to/host.out` flag requires an absolute path")]
#[diag("using the `-Z offload=Host=/absolute/path/to/device.bin` flag requires an absolute path")]
pub(crate) struct OffloadWithoutAbsPath;
#[derive(Diagnostic)]
#[diag(
"using the `-Z offload=Host=/absolute/path/to/host.out` flag must point to a `host.out` file"
"using the `-Z offload=Host=/absolute/path/to/device.bin` flag must point to a `device.bin` file"
)]
pub(crate) struct OffloadWrongFileName;
#[derive(Diagnostic)]
#[diag(
"the given path/file to `host.out` does not exist. Did you forget to run the device compilation first?"
"the given path/file to `device.bin` does not exist. Did you forget to run the device compilation first?"
)]
pub(crate) struct OffloadNonexistingPath;
#[derive(Diagnostic)]
#[diag("call to BundleImages failed, `host.out` was not created")]
#[diag("call to BundleImages failed, `device.bin` was not created")]
pub(crate) struct OffloadBundleImagesFailed;
#[derive(Diagnostic)]
+6 -6
View File
@@ -1693,15 +1693,15 @@ pub(crate) fn LLVMBuildCallBr<'a>(
mod Offload {
use super::*;
unsafe extern "C" {
/// Processes the module and writes it in an offload compatible way into a "host.out" file.
/// Processes the module and writes it in an offload compatible way into a "device.bin" file.
pub(crate) fn LLVMRustBundleImages<'a>(
M: &'a Module,
TM: &'a TargetMachine,
host_out: *const c_char,
device_bin: *const c_char,
) -> bool;
pub(crate) unsafe fn LLVMRustOffloadEmbedBufferInModule<'a>(
_M: &'a Module,
_host_out: *const c_char,
_device_bin: *const c_char,
) -> bool;
pub(crate) fn LLVMRustOffloadMapper<'a>(
OldFn: &'a Value,
@@ -1717,19 +1717,19 @@ pub(crate) fn LLVMRustOffloadMapper<'a>(
#[cfg(not(feature = "llvm_offload"))]
mod Offload_fallback {
use super::*;
/// Processes the module and writes it in an offload compatible way into a "host.out" file.
/// Processes the module and writes it in an offload compatible way into a "device.bin" file.
/// Marked as unsafe to match the real offload wrapper which is unsafe due to FFI.
#[allow(unused_unsafe)]
pub(crate) unsafe fn LLVMRustBundleImages<'a>(
_M: &'a Module,
_TM: &'a TargetMachine,
_host_out: *const c_char,
_device_bin: *const c_char,
) -> bool {
unimplemented!("This rustc version was not built with LLVM Offload support!");
}
pub(crate) unsafe fn LLVMRustOffloadEmbedBufferInModule<'a>(
_M: &'a Module,
_host_out: *const c_char,
_device_bin: *const c_char,
) -> bool {
unimplemented!("This rustc version was not built with LLVM Offload support!");
}
@@ -178,7 +178,7 @@ static Error writeFile(StringRef Filename, StringRef Data) {
// This is the first of many steps in creating a binary using llvm offload,
// to run code on the gpu. Concrete, it replaces the following binary use:
// clang-offload-packager -o host.out
// clang-offload-packager -o device.bin
// --image=file=device.bc,triple=amdgcn-amd-amdhsa,arch=gfx90a,kind=openmp
// The input module is the rust code compiled for a gpu target like amdgpu.
// Based on clang/tools/clang-offload-packager/ClangOffloadPackager.cpp