Skip to content

Commit fa584fa

Browse files
committed
Update test and verify that tgt_(un)register_lib have the right type
1 parent 4c3310a commit fa584fa

5 files changed

Lines changed: 40 additions & 28 deletions

File tree

compiler/rustc_codegen_llvm/src/base.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,13 @@ pub(crate) fn compile_codegen_unit(
9393
// They are necessary for correct offload execution. We do this here to simplify the
9494
// `offload` intrinsic, avoiding the need for tracking whether it's the first
9595
// intrinsic call or not.
96-
let has_host_offload =
97-
cx.sess().opts.unstable_opts.offload.iter().any(|o| matches!(o, Offload::Host(_)));
96+
let has_host_offload = cx
97+
.sess()
98+
.opts
99+
.unstable_opts
100+
.offload
101+
.iter()
102+
.any(|o| matches!(o, Offload::Host(_) | Offload::Test));
98103
if has_host_offload && !cx.sess().target.is_like_gpu {
99104
cx.offload_globals.replace(Some(OffloadGlobals::declare(&cx)));
100105
}

compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,9 @@ impl<'ll> OffloadGlobals<'ll> {
4949
let bin_desc = cx.type_named_struct("struct.__tgt_bin_desc");
5050
cx.set_struct_body(bin_desc, &tgt_bin_desc_ty, false);
5151

52-
let register_lib = declare_offload_fn(&cx, "__tgt_register_lib", mapper_fn_ty);
53-
let unregister_lib = declare_offload_fn(&cx, "__tgt_unregister_lib", mapper_fn_ty);
52+
let reg_lib_decl = cx.type_func(&[cx.type_ptr()], cx.type_void());
53+
let register_lib = declare_offload_fn(&cx, "__tgt_register_lib", reg_lib_decl);
54+
let unregister_lib = declare_offload_fn(&cx, "__tgt_unregister_lib", reg_lib_decl);
5455
let init_ty = cx.type_func(&[], cx.type_void());
5556
let init_rtls = declare_offload_fn(cx, "__tgt_init_all_rtls", init_ty);
5657

compiler/rustc_session/src/config.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,8 @@ pub enum Offload {
196196
Device,
197197
/// Second step in the offload pipeline, generates the host code to call kernels.
198198
Host(String),
199+
/// Test is similar to Host, but allows testing without a device artifact.
200+
Test,
199201
}
200202

201203
/// The different settings that the `-Z autodiff` flag can have.

compiler/rustc_session/src/options.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -794,7 +794,8 @@ mod desc {
794794
pub(crate) const parse_list_with_polarity: &str =
795795
"a comma-separated list of strings, with elements beginning with + or -";
796796
pub(crate) const parse_autodiff: &str = "a comma separated list of settings: `Enable`, `PrintSteps`, `PrintTA`, `PrintTAFn`, `PrintAA`, `PrintPerf`, `PrintModBefore`, `PrintModAfter`, `PrintModFinal`, `PrintPasses`, `NoPostopt`, `LooseTypes`, `Inline`, `NoTT`";
797-
pub(crate) const parse_offload: &str = "a comma separated list of settings: `Enable`";
797+
pub(crate) const parse_offload: &str =
798+
"a comma separated list of settings: `Host=<Absolute-Path>`, `Device`, `Test`";
798799
pub(crate) const parse_comma_list: &str = "a comma-separated list of strings";
799800
pub(crate) const parse_opt_comma_list: &str = parse_comma_list;
800801
pub(crate) const parse_number: &str = "a number";
@@ -1471,6 +1472,13 @@ pub mod parse {
14711472
}
14721473
Offload::Device
14731474
}
1475+
"Test" => {
1476+
if let Some(_) = arg {
1477+
// Test does not accept a value
1478+
return false;
1479+
}
1480+
Offload::Test
1481+
}
14741482
_ => {
14751483
// FIXME(ZuseZ4): print an error saying which value is not recognized
14761484
return false;

tests/codegen-llvm/gpu_offload/gpu_host.rs

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,10 @@
1-
//@ compile-flags: -Zoffload=Enable -Zunstable-options -C opt-level=3 -Clto=fat
1+
//@ compile-flags: -Zoffload=Test -Zunstable-options -C opt-level=3 -Clto=fat
22
//@ no-prefer-dynamic
3-
//@ needs-enzyme
3+
//@ needs-offload
44

55
// This test is verifying that we generate __tgt_target_data_*_mapper before and after a call to the
66
// kernel_1. Better documentation to what each global or variable means is available in the gpu
7-
// offlaod code, or the LLVM offload documentation. This code does not launch any GPU kernels yet,
8-
// and will be rewritten once a proper offload frontend has landed.
9-
//
10-
// We currently only handle memory transfer for specific calls to functions named `kernel_{num}`,
11-
// when inside of a function called main. This, too, is a temporary workaround for not having a
12-
// frontend.
7+
// offload code, or the LLVM offload documentation.
138

149
#![feature(rustc_attrs)]
1510
#![feature(core_intrinsics)]
@@ -22,6 +17,20 @@ fn main() {
2217
core::hint::black_box(&x);
2318
}
2419

20+
#[unsafe(no_mangle)]
21+
#[inline(never)]
22+
pub fn kernel_1(x: &mut [f32; 256]) {
23+
core::intrinsics::offload(_kernel_1, [256, 1, 1], [32, 1, 1], (x,))
24+
}
25+
26+
#[unsafe(no_mangle)]
27+
#[inline(never)]
28+
pub fn _kernel_1(x: &mut [f32; 256]) {
29+
for i in 0..256 {
30+
x[i] = 21.0;
31+
}
32+
}
33+
2534
// CHECK: %struct.ident_t = type { i32, i32, i32, i32, ptr }
2635
// CHECK: %struct.__tgt_offload_entry = type { i64, i16, i16, i32, ptr, ptr, i64, i64, ptr }
2736
// CHECK: %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
@@ -36,8 +45,9 @@ fn main() {
3645
// CHECK: @.offloading.entry_name._kernel_1 = internal unnamed_addr constant [10 x i8] c"_kernel_1\00", section ".llvm.rodata.offloading", align 1
3746
// CHECK: @.offloading.entry._kernel_1 = internal constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @._kernel_1.region_id, ptr @.offloading.entry_name._kernel_1, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
3847

39-
// CHECK: Function Attrs: nounwind
4048
// CHECK: declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr)
49+
// CHECK: declare void @__tgt_register_lib(ptr) local_unnamed_addr
50+
// CHECK: declare void @__tgt_unregister_lib(ptr) local_unnamed_addr
4151

4252
// CHECK: define{{( dso_local)?}} void @main()
4353
// CHECK-NEXT: start:
@@ -94,17 +104,3 @@ fn main() {
94104
// CHECK-NEXT: call void @__tgt_unregister_lib(ptr nonnull %EmptyDesc)
95105
// CHECK-NEXT: ret void
96106
// CHECK-NEXT: }
97-
98-
#[unsafe(no_mangle)]
99-
#[inline(never)]
100-
pub fn kernel_1(x: &mut [f32; 256]) {
101-
core::intrinsics::offload(_kernel_1, [256, 1, 1], [32, 1, 1], (x,))
102-
}
103-
104-
#[unsafe(no_mangle)]
105-
#[inline(never)]
106-
pub fn _kernel_1(x: &mut [f32; 256]) {
107-
for i in 0..256 {
108-
x[i] = 21.0;
109-
}
110-
}

0 commit comments

Comments
 (0)