[SPIR-V] Initial support for SPIR-V in gpuintrin.h#174910
Conversation
|
@llvm/pr-subscribers-backend-x86 Author: Joseph Huber (jhuber6) ChangesSummary: This should be the first step towards allowing SPIR-V to build things Would appreciate someone more familiar with the backend double-checking Full diff: https://github.com/llvm/llvm-project/pull/174910.diff 4 Files Affected:
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 1b96ac417bf70..c92b370b88d2d 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -297,6 +297,7 @@ set(gpu_files
gpuintrin.h
nvptxintrin.h
amdgpuintrin.h
+ spirvintrin.h
)
set(windows_only_files
diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h
index 7afc82413996b..30f3667adea73 100644
--- a/clang/lib/Headers/gpuintrin.h
+++ b/clang/lib/Headers/gpuintrin.h
@@ -60,6 +60,8 @@ _Pragma("omp end declare target");
#include <nvptxintrin.h>
#elif defined(__AMDGPU__)
#include <amdgpuintrin.h>
+#elif defined(__SPIRV__)
+#include <spirvintrin.h>
#elif !defined(_OPENMP)
#error "This header is only meant to be used on GPU architectures."
#endif
diff --git a/clang/lib/Headers/spirvintrin.h b/clang/lib/Headers/spirvintrin.h
new file mode 100644
index 0000000000000..bf5df70583dc6
--- /dev/null
+++ b/clang/lib/Headers/spirvintrin.h
@@ -0,0 +1,171 @@
+//===-- spirvintrin.h - SPIR-V intrinsic functions ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __SPIRVINTRIN_H
+#define __SPIRVINTRIN_H
+
+#ifndef __SPIRV__
+#error "This file is intended for SPIR-V targets or offloading to SPIR-V"
+#endif
+
+#ifndef __GPUINTRIN_H
+#error "Never use <spirvintrin.h> directly; include <gpuintrin.h> instead"
+#endif
+
+_Pragma("omp begin declare target device_type(nohost)");
+_Pragma("omp begin declare variant match(device = {arch(spirv64)})");
+
+// Type aliases to the address spaces used by the SPIR-V backend.
+#define __gpu_private __attribute__((address_space(0)))
+#define __gpu_constant __attribute__((address_space(2)))
+#define __gpu_local __attribute__((address_space(3)))
+#define __gpu_global __attribute__((address_space(1)))
+#define __gpu_generic __attribute__((address_space(4)))
+
+// Attribute to declare a function as a kernel.
+#define __gpu_kernel __attribute__((device_kernel, visibility("protected")))
+
+// Returns the number of workgroups in the 'x' dimension of the grid.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) {
+ return __builtin_spirv_num_workgroups(0);
+}
+
+// Returns the number of workgroups in the 'y' dimension of the grid.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_y(void) {
+ return __builtin_spirv_num_workgroups(1);
+}
+
+// Returns the number of workgroups in the 'z' dimension of the grid.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_z(void) {
+ return __builtin_spirv_num_workgroups(2);
+}
+
+// Returns the 'x' dimension of the current workgroup's id.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_x(void) {
+ return __builtin_spirv_workgroup_id(0);
+}
+
+// Returns the 'y' dimension of the current workgroup's id.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_y(void) {
+ return __builtin_spirv_workgroup_id(1);
+}
+
+// Returns the 'z' dimension of the current workgroup's id.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_z(void) {
+ return __builtin_spirv_workgroup_id(2);
+}
+
+// Returns the number of workitems in the 'x' dimension.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_x(void) {
+ return __builtin_spirv_workgroup_size(0);
+}
+
+// Returns the number of workitems in the 'y' dimension.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_y(void) {
+ return __builtin_spirv_workgroup_size(1);
+}
+
+// Returns the number of workitems in the 'z' dimension.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_z(void) {
+ return __builtin_spirv_workgroup_size(2);
+}
+
+// Returns the 'x' dimension id of the workitem in the current workgroup.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_x(void) {
+ return __builtin_spirv_local_invocation_id(0);
+}
+
+// Returns the 'y' dimension id of the workitem in the current workgroup.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_y(void) {
+ return __builtin_spirv_local_invocation_id(1);
+}
+
+// Returns the 'z' dimension id of the workitem in the current workgroup.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_z(void) {
+ return __builtin_spirv_local_invocation_id(2);
+}
+
+// Returns the size of an wavefront, either 32 or 64 depending on hardware
+// and compilation options.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_lanes(void) {
+ return __builtin_spirv_subgroup_size();
+}
+
+// Returns the id of the thread inside of an wavefront executing together.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_lane_id(void) {
+ return __builtin_spirv_subgroup_id();
+}
+
+// Returns the bit-mask of active threads in the current wavefront. This
+// implementation is incorrect if the target uses more than 64 lanes.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_lane_mask(void) {
+ uint32_t [[clang::ext_vector_type(4)]] __mask =
+ __builtin_spirv_subgroup_ballot(1);
+ return __builtin_bit_cast(uint64_t,
+ __builtin_shufflevector(__mask, __mask, 0, 1));
+}
+
+// Copies the value from the first active thread in the wavefront to the rest.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t
+__gpu_read_first_lane_u32(uint64_t __lane_mask, uint32_t __x) {
+ return __builtin_spirv_subgroup_shuffle(__x,
+ __builtin_ctzg(__gpu_lane_mask()));
+}
+
+// Returns a bitmask of threads in the current lane for which \p x is true. This
+// implementation is incorrect if the target uses more than 64 lanes.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_ballot(uint64_t __lane_mask,
+ bool __x) {
+ // The lane_mask & gives the nvptx semantics when lane_mask is a subset of
+ // the active threads.
+ uint32_t [[clang::ext_vector_type(4)]] __mask =
+ __builtin_spirv_subgroup_ballot(__x);
+ return __lane_mask & __builtin_bit_cast(uint64_t, __builtin_shufflevector(
+ __mask, __mask, 0, 1));
+}
+
+// Wait for all threads in the wavefront to converge, this is a noop on SPIR-V.
+_DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_lane(uint64_t __lane_mask) {
+}
+
+// Shuffles the the lanes inside the wavefront according to the given index.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t
+__gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x,
+ uint32_t __width) {
+ uint32_t __lane = __idx + (__gpu_lane_id() & ~(__width - 1));
+ return __builtin_spirv_subgroup_shuffle(__x, __lane);
+}
+
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
+ return __gpu_match_any_u32_impl(__lane_mask, __x);
+}
+
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) {
+ return __gpu_match_any_u64_impl(__lane_mask, __x);
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) {
+ return __gpu_match_all_u32_impl(__lane_mask, __x);
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) {
+ return __gpu_match_all_u64_impl(__lane_mask, __x);
+}
+
+_Pragma("omp end declare variant");
+_Pragma("omp end declare target");
+
+#endif // __SPIRVINTRIN_H
diff --git a/clang/test/Headers/gpuintrin_lang.c b/clang/test/Headers/gpuintrin_lang.c
index 653f87aea2ce3..e3db72d5ff928 100644
--- a/clang/test/Headers/gpuintrin_lang.c
+++ b/clang/test/Headers/gpuintrin_lang.c
@@ -22,6 +22,11 @@
// RUN: -fopenmp-is-target-device -triple amdgcn -emit-llvm %s -o - \
// RUN: | FileCheck %s --check-prefix=OPENMP
//
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -DSYCL \
+// RUN: -internal-isystem %S/../../lib/Headers/ -fsycl-is-device \
+// RUN: -x c++ -triple spirv64 -emit-llvm %s -o - \
+// RUN: | FileCheck %s --check-prefix=SYCL
+//
// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \
// RUN: -std=c89 -internal-isystem %S/../../lib/Headers/ \
// RUN: -triple amdgcn-amd-amdhsa -emit-llvm %s -o - \
@@ -32,11 +37,13 @@
#ifdef __device__
__device__ int foo() { return __gpu_thread_id_x(); }
+#elif defined(SYCL)
+extern "C" [[clang::sycl_external]] int foo() { return __gpu_thread_id_x(); }
#else
// CUDA-LABEL: define dso_local i32 @foo(
// CUDA-SAME: ) #[[ATTR0:[0-9]+]] {
// CUDA-NEXT: [[ENTRY:.*:]]
-// CUDA-NEXT: [[TMP0:%.*]] = call {{.*}}i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+// CUDA-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CUDA-NEXT: ret i32 [[TMP0]]
//
// HIP-LABEL: define dso_local i32 @foo(
@@ -61,6 +68,17 @@ __device__ int foo() { return __gpu_thread_id_x(); }
// OPENMP-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
// OPENMP-NEXT: ret i32 [[TMP0]]
//
+// SYCL-LABEL: define spir_func i32 @foo(
+// SYCL-SAME: ) #[[ATTR0:[0-9]+]] {
+// SYCL-NEXT: [[ENTRY:.*:]]
+// SYCL-NEXT: [[RETVAL_I:%.*]] = alloca i32, align 4
+// SYCL-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// SYCL-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4)
+// SYCL-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr [[RETVAL_I]] to ptr addrspace(4)
+// SYCL-NEXT: [[SPV_THREAD_ID_IN_GROUP_I:%.*]] = call i64 @llvm.spv.thread.id.in.group.i64(i32 0)
+// SYCL-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPV_THREAD_ID_IN_GROUP_I]] to i32
+// SYCL-NEXT: ret i32 [[CONV_I]]
+//
// C89-LABEL: define dso_local i32 @foo(
// C89-SAME: ) #[[ATTR0:[0-9]+]] {
// C89-NEXT: [[ENTRY:.*:]]
|
|
@llvm/pr-subscribers-clang Author: Joseph Huber (jhuber6) ChangesSummary: This should be the first step towards allowing SPIR-V to build things Would appreciate someone more familiar with the backend double-checking Full diff: https://github.com/llvm/llvm-project/pull/174910.diff 4 Files Affected:
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 1b96ac417bf70..c92b370b88d2d 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -297,6 +297,7 @@ set(gpu_files
gpuintrin.h
nvptxintrin.h
amdgpuintrin.h
+ spirvintrin.h
)
set(windows_only_files
diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h
index 7afc82413996b..30f3667adea73 100644
--- a/clang/lib/Headers/gpuintrin.h
+++ b/clang/lib/Headers/gpuintrin.h
@@ -60,6 +60,8 @@ _Pragma("omp end declare target");
#include <nvptxintrin.h>
#elif defined(__AMDGPU__)
#include <amdgpuintrin.h>
+#elif defined(__SPIRV__)
+#include <spirvintrin.h>
#elif !defined(_OPENMP)
#error "This header is only meant to be used on GPU architectures."
#endif
diff --git a/clang/lib/Headers/spirvintrin.h b/clang/lib/Headers/spirvintrin.h
new file mode 100644
index 0000000000000..bf5df70583dc6
--- /dev/null
+++ b/clang/lib/Headers/spirvintrin.h
@@ -0,0 +1,171 @@
+//===-- spirvintrin.h - SPIR-V intrinsic functions ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __SPIRVINTRIN_H
+#define __SPIRVINTRIN_H
+
+#ifndef __SPIRV__
+#error "This file is intended for SPIR-V targets or offloading to SPIR-V"
+#endif
+
+#ifndef __GPUINTRIN_H
+#error "Never use <spirvintrin.h> directly; include <gpuintrin.h> instead"
+#endif
+
+_Pragma("omp begin declare target device_type(nohost)");
+_Pragma("omp begin declare variant match(device = {arch(spirv64)})");
+
+// Type aliases to the address spaces used by the SPIR-V backend.
+#define __gpu_private __attribute__((address_space(0)))
+#define __gpu_constant __attribute__((address_space(2)))
+#define __gpu_local __attribute__((address_space(3)))
+#define __gpu_global __attribute__((address_space(1)))
+#define __gpu_generic __attribute__((address_space(4)))
+
+// Attribute to declare a function as a kernel.
+#define __gpu_kernel __attribute__((device_kernel, visibility("protected")))
+
+// Returns the number of workgroups in the 'x' dimension of the grid.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) {
+ return __builtin_spirv_num_workgroups(0);
+}
+
+// Returns the number of workgroups in the 'y' dimension of the grid.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_y(void) {
+ return __builtin_spirv_num_workgroups(1);
+}
+
+// Returns the number of workgroups in the 'z' dimension of the grid.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_z(void) {
+ return __builtin_spirv_num_workgroups(2);
+}
+
+// Returns the 'x' dimension of the current workgroup's id.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_x(void) {
+ return __builtin_spirv_workgroup_id(0);
+}
+
+// Returns the 'y' dimension of the current workgroup's id.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_y(void) {
+ return __builtin_spirv_workgroup_id(1);
+}
+
+// Returns the 'z' dimension of the current workgroup's id.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_z(void) {
+ return __builtin_spirv_workgroup_id(2);
+}
+
+// Returns the number of workitems in the 'x' dimension.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_x(void) {
+ return __builtin_spirv_workgroup_size(0);
+}
+
+// Returns the number of workitems in the 'y' dimension.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_y(void) {
+ return __builtin_spirv_workgroup_size(1);
+}
+
+// Returns the number of workitems in the 'z' dimension.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_z(void) {
+ return __builtin_spirv_workgroup_size(2);
+}
+
+// Returns the 'x' dimension id of the workitem in the current workgroup.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_x(void) {
+ return __builtin_spirv_local_invocation_id(0);
+}
+
+// Returns the 'y' dimension id of the workitem in the current workgroup.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_y(void) {
+ return __builtin_spirv_local_invocation_id(1);
+}
+
+// Returns the 'z' dimension id of the workitem in the current workgroup.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_z(void) {
+ return __builtin_spirv_local_invocation_id(2);
+}
+
+// Returns the size of an wavefront, either 32 or 64 depending on hardware
+// and compilation options.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_lanes(void) {
+ return __builtin_spirv_subgroup_size();
+}
+
+// Returns the id of the thread inside of an wavefront executing together.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_lane_id(void) {
+ return __builtin_spirv_subgroup_id();
+}
+
+// Returns the bit-mask of active threads in the current wavefront. This
+// implementation is incorrect if the target uses more than 64 lanes.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_lane_mask(void) {
+ uint32_t [[clang::ext_vector_type(4)]] __mask =
+ __builtin_spirv_subgroup_ballot(1);
+ return __builtin_bit_cast(uint64_t,
+ __builtin_shufflevector(__mask, __mask, 0, 1));
+}
+
+// Copies the value from the first active thread in the wavefront to the rest.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t
+__gpu_read_first_lane_u32(uint64_t __lane_mask, uint32_t __x) {
+ return __builtin_spirv_subgroup_shuffle(__x,
+ __builtin_ctzg(__gpu_lane_mask()));
+}
+
+// Returns a bitmask of threads in the current lane for which \p x is true. This
+// implementation is incorrect if the target uses more than 64 lanes.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_ballot(uint64_t __lane_mask,
+ bool __x) {
+ // The lane_mask & gives the nvptx semantics when lane_mask is a subset of
+ // the active threads.
+ uint32_t [[clang::ext_vector_type(4)]] __mask =
+ __builtin_spirv_subgroup_ballot(__x);
+ return __lane_mask & __builtin_bit_cast(uint64_t, __builtin_shufflevector(
+ __mask, __mask, 0, 1));
+}
+
+// Wait for all threads in the wavefront to converge, this is a noop on SPIR-V.
+_DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_lane(uint64_t __lane_mask) {
+}
+
+// Shuffles the the lanes inside the wavefront according to the given index.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t
+__gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x,
+ uint32_t __width) {
+ uint32_t __lane = __idx + (__gpu_lane_id() & ~(__width - 1));
+ return __builtin_spirv_subgroup_shuffle(__x, __lane);
+}
+
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
+ return __gpu_match_any_u32_impl(__lane_mask, __x);
+}
+
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) {
+ return __gpu_match_any_u64_impl(__lane_mask, __x);
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) {
+ return __gpu_match_all_u32_impl(__lane_mask, __x);
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) {
+ return __gpu_match_all_u64_impl(__lane_mask, __x);
+}
+
+_Pragma("omp end declare variant");
+_Pragma("omp end declare target");
+
+#endif // __SPIRVINTRIN_H
diff --git a/clang/test/Headers/gpuintrin_lang.c b/clang/test/Headers/gpuintrin_lang.c
index 653f87aea2ce3..e3db72d5ff928 100644
--- a/clang/test/Headers/gpuintrin_lang.c
+++ b/clang/test/Headers/gpuintrin_lang.c
@@ -22,6 +22,11 @@
// RUN: -fopenmp-is-target-device -triple amdgcn -emit-llvm %s -o - \
// RUN: | FileCheck %s --check-prefix=OPENMP
//
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -DSYCL \
+// RUN: -internal-isystem %S/../../lib/Headers/ -fsycl-is-device \
+// RUN: -x c++ -triple spirv64 -emit-llvm %s -o - \
+// RUN: | FileCheck %s --check-prefix=SYCL
+//
// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \
// RUN: -std=c89 -internal-isystem %S/../../lib/Headers/ \
// RUN: -triple amdgcn-amd-amdhsa -emit-llvm %s -o - \
@@ -32,11 +37,13 @@
#ifdef __device__
__device__ int foo() { return __gpu_thread_id_x(); }
+#elif defined(SYCL)
+extern "C" [[clang::sycl_external]] int foo() { return __gpu_thread_id_x(); }
#else
// CUDA-LABEL: define dso_local i32 @foo(
// CUDA-SAME: ) #[[ATTR0:[0-9]+]] {
// CUDA-NEXT: [[ENTRY:.*:]]
-// CUDA-NEXT: [[TMP0:%.*]] = call {{.*}}i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+// CUDA-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CUDA-NEXT: ret i32 [[TMP0]]
//
// HIP-LABEL: define dso_local i32 @foo(
@@ -61,6 +68,17 @@ __device__ int foo() { return __gpu_thread_id_x(); }
// OPENMP-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
// OPENMP-NEXT: ret i32 [[TMP0]]
//
+// SYCL-LABEL: define spir_func i32 @foo(
+// SYCL-SAME: ) #[[ATTR0:[0-9]+]] {
+// SYCL-NEXT: [[ENTRY:.*:]]
+// SYCL-NEXT: [[RETVAL_I:%.*]] = alloca i32, align 4
+// SYCL-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// SYCL-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4)
+// SYCL-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr [[RETVAL_I]] to ptr addrspace(4)
+// SYCL-NEXT: [[SPV_THREAD_ID_IN_GROUP_I:%.*]] = call i64 @llvm.spv.thread.id.in.group.i64(i32 0)
+// SYCL-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPV_THREAD_ID_IN_GROUP_I]] to i32
+// SYCL-NEXT: ret i32 [[CONV_I]]
+//
// C89-LABEL: define dso_local i32 @foo(
// C89-SAME: ) #[[ATTR0:[0-9]+]] {
// C89-NEXT: [[ENTRY:.*:]]
|
|
This will fail tests until the dependent PRs are merged. Inspecting the basic IR makes sense but I have no way to test this. Hopefully @sarnex can help here in the future because this should make porting the OpenMP support much easier. The SPIR-V intrinsics are missing thread syncs, an exit, and the pointer introspections. No clue if I got the address spaces or the thread -> grid accessors right. |
🪟 Windows x64 Test Results
✅ The build succeeded and all tests passed. |
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
sarnex
left a comment
There was a problem hiding this comment.
lgtm, but i asked greg from my team to also take a look at this since he's more familiar with what the correct logic should be
| #define __gpu_generic __attribute__((address_space(4))) | ||
|
|
||
| // Attribute to declare a function as a kernel. | ||
| #define __gpu_kernel __attribute__((device_kernel, visibility("protected"))) |
There was a problem hiding this comment.
maybe we could unify all these and move it to gpuintrin.h and remove it from each target's header since i unified the attrs a while ago?
There was a problem hiding this comment.
Yeah, I'll do a pass to simplify that in the future since it applies to the libc code as well.
|
Couple of questions:
|
Yes, these aren't intended to be a completely inclusive set. I'm working on exposing
I don't know exactly how SPIR-V works. It seems that some things are resolved as external functions and hooked up by some Khronos tool? I'd prefer if we moved away from that now that we have a backend. Correct me if I'm wrong here. The proper way of doing this is always builtins to LLVM backend intrinsics, everything else is more of a temporary hack as far as I'm aware. The |
7a4076d to
b4ba797
Compare
Summary: llvm#174862 and llvm#174655 provided the intrinsics required to get the fundamental operations working for these. This patch sets up the basic support (as far as I know). This should be the first step towards allowing SPIR-V to build things like the LLVM libc and the OpenMP Device Runtime Library. The implementations here are intentionally inefficient, such as not using the dedicated SPIR-V opcode for read firstlane. This is just to start and hopefully start testing things later. Would appreciate someone more familiar with the backend double-checking these.
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/33957 Here is the relevant piece of the build log for the reference |
Summary: llvm#174862 and llvm#174655 provided the intrinsics required to get the fundamental operations working for these. This patch sets up the basic support (as far as I know). This should be the first step towards allowing SPIR-V to build things like the LLVM libc and the OpenMP Device Runtime Library. The implementations here are intentionally inefficient, such as not using the dedicated SPIR-V opcode for read firstlane. This is just to start and hopefully start testing things later. Would appreciate someone more familiar with the backend double-checking these.
Summary:
#174862 and
#174655 provided the intrinsics
required to get the fundamental operations working for these. This patch
sets up the basic support (as far as I know).
This should be the first step towards allowing SPIR-V to build things
like the LLVM libc and the OpenMP Device Runtime Library. The
implementations here are intentionally inefficient, such as not using
the dedicated SPIR-V opcode for read firstlane. This is just to start
and hopefully start testing things later.
Would appreciate someone more familiar with the backend double-checking
these.