Skip to content

Commit 93f3edf

Browse files
authored
Unrolled build for #150511
Rollup merge of #150511 - Sa4dUs:offload-inline, r=ZuseZ4 Allow inline calls to offload intrinsic Removes explicit insertion point handling and recovers the pointer at the end of the saved basic block. r? `@ZuseZ4` fixes: #150413
2 parents 2848c2e + 41a24c4 commit 93f3edf

File tree

4 files changed

+39
-37
lines changed

4 files changed

+39
-37
lines changed

compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
430430
let fn_ty = offload_globals.mapper_fn_ty;
431431

432432
let num_args = types.len() as u64;
433-
let ip = unsafe { llvm::LLVMRustGetInsertPoint(&builder.llbuilder) };
433+
let bb = builder.llbb();
434434

435435
// FIXME(Sa4dUs): dummy loads are a temp workaround, we should find a proper way to prevent these
436436
// variables from being optimized away
@@ -468,7 +468,7 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
468468

469469
// Step 1)
470470
unsafe {
471-
llvm::LLVMRustRestoreInsertPoint(&builder.llbuilder, ip);
471+
llvm::LLVMPositionBuilderAtEnd(&builder.llbuilder, bb);
472472
}
473473
builder.memset(tgt_bin_desc_alloca, cx.get_const_i8(0), cx.get_const_i64(32), Align::EIGHT);
474474

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2443,8 +2443,6 @@ unsafe extern "C" {
24432443

24442444
pub(crate) fn LLVMRustPositionBuilderPastAllocas<'a>(B: &Builder<'a>, Fn: &'a Value);
24452445
pub(crate) fn LLVMRustPositionBuilderAtStart<'a>(B: &Builder<'a>, BB: &'a BasicBlock);
2446-
pub(crate) fn LLVMRustGetInsertPoint<'a>(B: &Builder<'a>) -> &'a Value;
2447-
pub(crate) fn LLVMRustRestoreInsertPoint<'a>(B: &Builder<'a>, IP: &'a Value);
24482446

24492447
pub(crate) fn LLVMRustSetModulePICLevel(M: &Module);
24502448
pub(crate) fn LLVMRustSetModulePIELevel(M: &Module);

compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1458,39 +1458,6 @@ extern "C" void LLVMRustPositionAfter(LLVMBuilderRef B, LLVMValueRef Instr) {
14581458
}
14591459
}
14601460

1461-
extern "C" LLVMValueRef LLVMRustGetInsertPoint(LLVMBuilderRef B) {
1462-
llvm::IRBuilderBase &IRB = *unwrap(B);
1463-
1464-
llvm::IRBuilderBase::InsertPoint ip = IRB.saveIP();
1465-
llvm::BasicBlock *BB = ip.getBlock();
1466-
1467-
if (!BB)
1468-
return nullptr;
1469-
1470-
auto it = ip.getPoint();
1471-
1472-
if (it == BB->end())
1473-
return nullptr;
1474-
1475-
llvm::Instruction *I = &*it;
1476-
return wrap(I);
1477-
}
1478-
1479-
extern "C" void LLVMRustRestoreInsertPoint(LLVMBuilderRef B,
1480-
LLVMValueRef Instr) {
1481-
llvm::IRBuilderBase &IRB = *unwrap(B);
1482-
1483-
if (!Instr) {
1484-
llvm::BasicBlock *BB = IRB.GetInsertBlock();
1485-
if (BB)
1486-
IRB.SetInsertPoint(BB);
1487-
return;
1488-
}
1489-
1490-
llvm::Instruction *I = unwrap<llvm::Instruction>(Instr);
1491-
IRB.SetInsertPoint(I);
1492-
}
1493-
14941461
extern "C" LLVMValueRef
14951462
LLVMRustGetFunctionCall(LLVMValueRef Fn, const char *Name, size_t NameLen) {
14961463
auto targetName = StringRef(Name, NameLen);
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
//@ compile-flags: -Zoffload=Test -Zunstable-options -C opt-level=3 -Clto=fat
2+
//@ no-prefer-dynamic
3+
//@ needs-offload
4+
5+
// This test verifies that the offload intrinsic is correctly lowered even when the caller
6+
// contains control flow.
7+
8+
#![feature(abi_gpu_kernel)]
9+
#![feature(rustc_attrs)]
10+
#![feature(core_intrinsics)]
11+
#![no_main]
12+
13+
// CHECK: define{{( dso_local)?}} void @main()
14+
// CHECK-NOT: define
15+
// CHECK: %EmptyDesc = alloca %struct.__tgt_bin_desc, align 8
16+
// CHECK-NEXT: %.offload_baseptrs = alloca [1 x ptr], align 8
17+
// CHECK-NEXT: %.offload_ptrs = alloca [1 x ptr], align 8
18+
// CHECK-NEXT: %.offload_sizes = alloca [1 x i64], align 8
19+
// CHECK-NEXT: %kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
20+
// CHECK: br label %bb3
21+
// CHECK-NOT define
22+
// CHECK: bb3
23+
// CHECK: call void @__tgt_target_data_begin_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes.foo, ptr null, ptr null)
24+
// CHECK: %10 = call i32 @__tgt_target_kernel(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 2097152, i32 256, ptr nonnull @.foo.region_id, ptr nonnull %kernel_args)
25+
// CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes.foo, ptr null, ptr null)
26+
#[unsafe(no_mangle)]
27+
unsafe fn main() {
28+
let A = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0];
29+
30+
for i in 0..100 {
31+
core::intrinsics::offload::<_, _, ()>(foo, (A.as_ptr() as *const [f32; 6],));
32+
}
33+
}
34+
35+
unsafe extern "C" {
36+
pub fn foo(A: *const [f32; 6]) -> ();
37+
}

0 commit comments

Comments
 (0)