1- //@ compile-flags: -Zoffload=Enable -Zunstable-options -C opt-level=3 -Clto=fat
1+ //@ compile-flags: -Zoffload=Test -Zunstable-options -C opt-level=3 -Clto=fat
22//@ no-prefer-dynamic
3- //@ needs-enzyme
3+ //@ needs-offload
44
55// This test is verifying that we generate __tgt_target_data_*_mapper before and after a call to the
66// kernel_1. Better documentation to what each global or variable means is available in the gpu
7- // offlaod code, or the LLVM offload documentation. This code does not launch any GPU kernels yet,
8- // and will be rewritten once a proper offload frontend has landed.
9- //
10- // We currently only handle memory transfer for specific calls to functions named `kernel_{num}`,
11- // when inside of a function called main. This, too, is a temporary workaround for not having a
12- // frontend.
7+ // offload code, or the LLVM offload documentation.
138
149#![ feature( rustc_attrs) ]
1510#![ feature( core_intrinsics) ]
@@ -22,6 +17,20 @@ fn main() {
2217 core:: hint:: black_box ( & x) ;
2318}
2419
20+ #[ unsafe( no_mangle) ]
21+ #[ inline( never) ]
22+ pub fn kernel_1 ( x : & mut [ f32 ; 256 ] ) {
23+ core:: intrinsics:: offload ( _kernel_1, [ 256 , 1 , 1 ] , [ 32 , 1 , 1 ] , ( x, ) )
24+ }
25+
26+ #[ unsafe( no_mangle) ]
27+ #[ inline( never) ]
28+ pub fn _kernel_1 ( x : & mut [ f32 ; 256 ] ) {
29+ for i in 0 ..256 {
30+ x[ i] = 21.0 ;
31+ }
32+ }
33+
2534// CHECK: %struct.ident_t = type { i32, i32, i32, i32, ptr }
2635// CHECK: %struct.__tgt_offload_entry = type { i64, i16, i16, i32, ptr, ptr, i64, i64, ptr }
2736// CHECK: %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
@@ -36,8 +45,9 @@ fn main() {
3645// CHECK: @.offloading.entry_name._kernel_1 = internal unnamed_addr constant [10 x i8] c"_kernel_1\00", section ".llvm.rodata.offloading", align 1
3746// CHECK: @.offloading.entry._kernel_1 = internal constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @._kernel_1.region_id, ptr @.offloading.entry_name._kernel_1, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
3847
39- // CHECK: Function Attrs: nounwind
4048// CHECK: declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr)
49+ // CHECK: declare void @__tgt_register_lib(ptr) local_unnamed_addr
50+ // CHECK: declare void @__tgt_unregister_lib(ptr) local_unnamed_addr
4151
4252// CHECK: define{{( dso_local)?}} void @main()
4353// CHECK-NEXT: start:
@@ -94,17 +104,3 @@ fn main() {
94104// CHECK-NEXT: call void @__tgt_unregister_lib(ptr nonnull %EmptyDesc)
95105// CHECK-NEXT: ret void
96106// CHECK-NEXT: }
97-
98- #[ unsafe( no_mangle) ]
99- #[ inline( never) ]
100- pub fn kernel_1 ( x : & mut [ f32 ; 256 ] ) {
101- core:: intrinsics:: offload ( _kernel_1, [ 256 , 1 , 1 ] , [ 32 , 1 , 1 ] , ( x, ) )
102- }
103-
104- #[ unsafe( no_mangle) ]
105- #[ inline( never) ]
106- pub fn _kernel_1 ( x : & mut [ f32 ; 256 ] ) {
107- for i in 0 ..256 {
108- x[ i] = 21.0 ;
109- }
110- }
0 commit comments