Skip to content

Commit fe0529a

Browse files
committed
Fixed remaining nvc++ warnings
1 parent 2f9f075 commit fe0529a

10 files changed

Lines changed: 23 additions & 76 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ MatX support is currently limited to **Linux only** due to the time to test Wind
5252

5353
**Note**: CUDA 12.0.0 through 12.2.0 have an issue that causes building MatX unit tests to show a compiler error or cause a segfault in the compiler. Please use CUDA 11.5-11.8 or CUDA 12.2.1+ with MatX.
5454

55-
MatX is using features in C++17 and the latest CUDA compilers and libraries. For this reason, when running with GPU support, CUDA 11.5 and g++9 or clang 17 or newer is required. You can download the CUDA Toolkit [here](https://developer.nvidia.com/cuda-downloads).
55+
MatX is using features in C++17 and the latest CUDA compilers and libraries. For this reason, when running with GPU support, CUDA 11.5 and g++9, nvc++ 24.5, or clang 17 or newer is required. You can download the CUDA Toolkit [here](https://developer.nvidia.com/cuda-downloads).
5656

5757
MatX has been tested on and supports Pascal, Turing, Volta, Ampere, Ada, and Hopper GPU architectures. Jetson products are supported with Jetpack 5.0 or above.
5858

bench/00_operators/operators.cu

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ NVBENCH_BENCH_TYPES(random, NVBENCH_TYPE_AXES(random_types));
8787
template<typename T> T factorial(int N) {
8888
T prod = 1;
8989
for(int i=2; i<=N; i++) {
90-
prod = prod * i;
90+
prod = prod * static_cast<T>(i);
9191
}
9292
return prod;
9393
}
@@ -99,7 +99,7 @@ void sphericalharmonics(nvbench::state &state, nvbench::type_list<ValueType>)
9999
int l = 5;
100100
int m = 4;
101101
int n = 600;
102-
ValueType dx = M_PI/n;
102+
ValueType dx = static_cast<ValueType>(M_PI/n);
103103

104104
cudaExecutor exec{};
105105
auto col = range<0>({n+1},ValueType(0), ValueType(dx));
@@ -109,11 +109,11 @@ void sphericalharmonics(nvbench::state &state, nvbench::type_list<ValueType>)
109109

110110
auto Plm = lcollapse<3>(legendre(l, m, cos(theta)));
111111

112-
ValueType a = (2*l+1)*factorial<ValueType>(l-m);
113-
ValueType b = 4*M_PI*factorial<ValueType>(l+m);
112+
ValueType a = static_cast<ValueType>(2*l+1)*factorial<ValueType>(l-m);
113+
ValueType b = static_cast<ValueType>(4*M_PI)*factorial<ValueType>(l+m);
114114
ValueType C = cuda::std::sqrt(a/b);
115115

116-
auto Ylm = C * Plm * exp(cuda::std::complex<ValueType>(0,1)*(m*phi));
116+
auto Ylm = C * Plm * exp(cuda::std::complex<ValueType>(0,1)*(static_cast<ValueType>(m)*phi));
117117
auto [ Xm, Ym, Zm ] = sph2cart(phi, ValueType(M_PI)/2 - theta, abs(real(Ylm)));
118118

119119
// Work around C++17 restriction, structured bindings cannot be captured

bench/00_transform/conv.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,9 @@ void conv2d_direct_batch(nvbench::state &state,
130130
flops.set_string("description", "Trillions of operations per second");
131131

132132
if constexpr (is_complex_v<ValueType>) {
133-
flops.set_float64("value", (double)2 * out.Size(2) * out.Size(1) * out.Size(0) * bt.Size(2) * bt.Size(1) * 4 / seconds / 1e12);
133+
flops.set_float64("value", static_cast<double>(2 * out.Size(2) * out.Size(1) * out.Size(0) * bt.Size(2) * bt.Size(1) * 4) / seconds / 1e12);
134134
} else {
135-
flops.set_float64("value", (double)2 * out.Size(2) * out.Size(1) * out.Size(0) * bt.Size(2) * bt.Size(1) / seconds / 1e12);
135+
flops.set_float64("value", static_cast<double>(2 * out.Size(2) * out.Size(1) * out.Size(0) * bt.Size(2) * bt.Size(1)) / seconds / 1e12);
136136
}
137137
}
138138
NVBENCH_BENCH_TYPES(conv2d_direct_batch, NVBENCH_TYPE_AXES(conv_types));

bench/00_transform/qr.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ void qr_batch(nvbench::state &state,
1919
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream));
2020
cudaExecutor exec{stream};
2121

22-
int batch = state.get_int64("batch");
23-
int m = state.get_int64("rows");
24-
int n = state.get_int64("cols");
22+
int64_t batch = state.get_int64("batch");
23+
int64_t m = state.get_int64("rows");
24+
int64_t n = state.get_int64("cols");
2525

2626
auto A = make_tensor<AType>({batch, m, n});
2727
auto Q = make_tensor<AType>({batch, m, m});

bench/00_transform/svd_power.cu

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ void svdpi_batch(nvbench::state &state,
1919
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream));
2020
cudaExecutor exec{stream};
2121

22-
int batch = state.get_int64("batch");
23-
int m = state.get_int64("rows");
24-
int n = state.get_int64("cols");
22+
int64_t batch = state.get_int64("batch");
23+
int64_t m = state.get_int64("rows");
24+
int64_t n = state.get_int64("cols");
2525

26-
int r = std::min(n,m);
26+
int64_t r = std::min(n,m);
2727
auto A = make_tensor<AType>({batch, m, n});
2828
auto U = make_tensor<AType>({batch, m, r});
2929
auto VT = make_tensor<AType>({batch, r, n});
@@ -68,11 +68,11 @@ void svdbpi_batch(nvbench::state &state,
6868
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream));
6969
cudaExecutor exec{stream};
7070

71-
int batch = state.get_int64("batch");
72-
int m = state.get_int64("rows");
73-
int n = state.get_int64("cols");
71+
int64_t batch = state.get_int64("batch");
72+
int64_t m = state.get_int64("rows");
73+
int64_t n = state.get_int64("cols");
7474

75-
int r = std::min(n,m);
75+
int64_t r = std::min(n,m);
7676
auto A = make_tensor<AType>({batch, m, n});
7777
auto U = make_tensor<AType>({batch, m, r});
7878
auto VT = make_tensor<AType>({batch, r, n});

docs_input/build.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ the CPM_ documentation or the documentation for each package for more informatio
1919

2020
System Requirements
2121
-------------------
22-
MatX requires **CUDA 11.5** or higher, and **g++ 9.3+** or **clang 17+** for the host compiler. See the CUDA toolkit documentation
22+
MatX requires **CUDA 11.5** or higher, and **g++ 9.3+**, **clang 17+**, or **nvc++ 24.5** for the host compiler. See the CUDA toolkit documentation
2323
for supported host compilers. Other requirements for optional components are listed below.
2424

2525
.. warning:: Using MatX with an unsupported compiler may result in compiler and/or runtime errors.

include/matx/generators/range.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,6 @@ namespace matx
6060
else {
6161
return first_ + T(static_cast<T>(idx) * step_);
6262
}
63-
64-
if constexpr (!is_matx_half_v<T>) {
65-
return first_ + T(static_cast<T>(idx) * step_);
66-
}
67-
else {
68-
return first_ + T(static_cast<T>((float)idx) * step_);
69-
}
7063
}
7164
};
7265
}

include/matx/operators/repmat.h

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -105,15 +105,6 @@ namespace matx
105105
UpdateIndex(tup);
106106
return cuda::std::apply(op_, tup);
107107
}
108-
109-
if constexpr (Rank() != 0) {
110-
auto tup = cuda::std::make_tuple(indices...);
111-
UpdateIndex(tup);
112-
return cuda::std::apply(op_, tup);
113-
}
114-
else {
115-
return op_();
116-
}
117108
}
118109

119110
template <typename... Is>
@@ -127,15 +118,6 @@ namespace matx
127118
UpdateIndex(tup);
128119
return cuda::std::apply(op_, tup);
129120
}
130-
131-
if constexpr (Rank() != 0) {
132-
auto tup = cuda::std::make_tuple(indices...);
133-
UpdateIndex(tup);
134-
return cuda::std::apply(op_, tup);
135-
}
136-
else {
137-
return op_();
138-
}
139121
}
140122

141123
template <typename ShapeType, typename Executor>

include/matx/operators/reverse.h

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -74,15 +74,6 @@ namespace matx
7474
cuda::std::get<DIM>(tup) = Size(DIM) - cuda::std::get<DIM>(tup) - 1;
7575
return cuda::std::apply(op_, tup);
7676
}
77-
78-
if constexpr (Rank() != 0) {
79-
auto tup = cuda::std::make_tuple(indices...);
80-
cuda::std::get<DIM>(tup) = Size(DIM) - cuda::std::get<DIM>(tup) - 1;
81-
return cuda::std::apply(op_, tup);
82-
}
83-
else {
84-
return op_();
85-
}
8677
}
8778

8879
template <typename... Is>
@@ -96,15 +87,6 @@ namespace matx
9687
cuda::std::get<DIM>(tup) = Size(DIM) - cuda::std::get<DIM>(tup) - 1;
9788
return cuda::std::apply(op_, tup);
9889
}
99-
100-
if constexpr (Rank() != 0) {
101-
auto tup = cuda::std::make_tuple(indices...);
102-
cuda::std::get<DIM>(tup) = Size(DIM) - cuda::std::get<DIM>(tup) - 1;
103-
return cuda::std::apply(op_, tup);
104-
}
105-
else {
106-
return op_();
107-
}
10890
}
10991

11092
static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t Rank()

include/matx/operators/scalar_ops.h

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -496,10 +496,7 @@ template <typename T1, typename T2> struct FModF {
496496
static std::string str(const std::string &str1, const std::string &str2) { return "(" + str1 + "%" + str2 + ")"; }
497497

498498
static __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ auto op(T1 v1, T2 v2) {
499-
return _internal_fmod(v1, v2);
500-
501-
// Unreachable, but required by the compiler
502-
return typename std::invoke_result_t<decltype(op), T1, T2>{0};
499+
return _internal_fmod(v1, v2);
503500
}
504501
};
505502
template <typename T1, typename T2> using FModOp = BinOp<T1, T2, FModF<T1, T2>>;
@@ -520,10 +517,7 @@ template <typename T1, typename T2> struct Atan2F {
520517
static std::string str(const std::string &str1, const std::string &str2) { return "(" + str1 + "%" + str2 + ")"; }
521518

522519
static __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ auto op(T1 v1, T2 v2) {
523-
return _internal_atan2(v1, v2);
524-
525-
// Unreachable, but required by the compiler
526-
return typename std::invoke_result_t<decltype(op), T1, T2>{0};
520+
return _internal_atan2(v1, v2);
527521
}
528522
};
529523
template <typename T1, typename T2> using Atan2Op = BinOp<T1, T2, Atan2F<T1, T2>>;
@@ -649,8 +643,6 @@ static __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ auto _internal_isnan(T v1)
649643
} else {
650644
return cuda::std::isnan(static_cast<castType>(v1));
651645
}
652-
653-
return false;
654646
}
655647
template <typename T>
656648
struct IsNan {
@@ -675,9 +667,7 @@ static __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ auto _internal_isinf(T v1)
675667
return cuda::std::isinf(static_cast<typename castType::value_type>(v1.real())) || cuda::std::isinf(static_cast<typename castType::value_type>(v1.imag()));
676668
} else {
677669
return cuda::std::isinf(static_cast<castType>(v1));
678-
}
679-
680-
return false;
670+
}
681671
}
682672
template <typename T>
683673
struct IsInf {

0 commit comments

Comments
 (0)