Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ set(BUILD_UNIT_TEST_PC false CACHE BOOL "Do you want to build the primordial che
set(BUILD_UNIT_TEST_MC false CACHE BOOL "Do you want to build the metal chem unit test? (true/false)")

add_compile_options(-Werror -Wall -Wextra)
add_compile_definitions(NET_LOOP_UNROLL_LEN=1)

#setting sourcefiles and directories needed to make the test here
#so that they are accessible to codes using
Expand Down
6 changes: 6 additions & 0 deletions Make.Microphysics_extern
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ ifeq ($(USE_NONAKA_PLOT),TRUE)
DEFINES += -DNONAKA_PLOT
endif

# sometimes we benefit from loop unrolling. This specifies
# the size of the unroll
NET_LOOP_UNROLL_LEN ?= 4
DEFINES += -DNET_LOOP_UNROLL_LEN=$(NET_LOOP_UNROLL_LEN)


SCREEN_METHOD ?= screen5
ifeq ($(SCREEN_METHOD), null)
DEFINES += -DSCREEN_METHOD=SCREEN_METHOD_null
Expand Down
22 changes: 14 additions & 8 deletions util/linpack.H
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@

template <int num_eqs, bool allow_pivot>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void dgesl (RArray2D& a, IArray1D& pivot, RArray1D& b)
void dgesl (const RArray2D& a, const IArray1D& pivot, RArray1D& b)
{

int nm1 = num_eqs - 1;
constexpr int nm1 = num_eqs - 1;

// solve a * x = b
// first solve l * y = b
if (nm1 >= 1) {
if constexpr (nm1 >= 1) {
for (int k = 1; k <= nm1; ++k) {

amrex::Real t{};
Expand All @@ -30,6 +30,7 @@ void dgesl (RArray2D& a, IArray1D& pivot, RArray1D& b)
t = b(k);
}

AMREX_UNROLL_LOOP(NET_LOOP_UNROLL_LEN)
for (int j = k+1; j <= num_eqs; ++j) {
b(j) += t * a(j,k);
}
Expand All @@ -39,9 +40,10 @@ void dgesl (RArray2D& a, IArray1D& pivot, RArray1D& b)
// now solve u * x = y
for (int kb = 1; kb <= num_eqs; ++kb) {

int k = num_eqs + 1 - kb;
const int k = num_eqs + 1 - kb;
b(k) = b(k) / a(k,k);
amrex::Real t = -b(k);
AMREX_UNROLL_LOOP(NET_LOOP_UNROLL_LEN)
for (int j = 1; j <= k-1; ++j) {
b(j) += t * a(j,k);
}
Expand All @@ -64,11 +66,11 @@ void dgefa (RArray2D& a, IArray1D& pivot, int& info)
// gaussian elimination with partial pivoting

info = 0;
int nm1 = num_eqs - 1;
constexpr int nm1 = num_eqs - 1;

amrex::Real t;

if (nm1 >= 1) {
if constexpr (nm1 >= 1) {

for (int k = 1; k <= nm1; ++k) {

Expand All @@ -77,10 +79,12 @@ void dgefa (RArray2D& a, IArray1D& pivot, int& info)

if constexpr (allow_pivot) {
amrex::Real dmax = std::abs(a(k,k));
AMREX_UNROLL_LOOP(NET_LOOP_UNROLL_LEN)
for (int i = k+1; i <= num_eqs; ++i) {
if (std::abs(a(i,k)) > dmax) {
amrex::Real ai = std::abs(a(i, k));
if (ai > dmax) {
l = i;
dmax = std::abs(a(i,k));
dmax = ai;
}
}

Expand All @@ -101,6 +105,7 @@ void dgefa (RArray2D& a, IArray1D& pivot, int& info)

// compute multipliers
t = -1.0e0_rt / a(k,k);
AMREX_UNROLL_LOOP(NET_LOOP_UNROLL_LEN)
for (int j = k+1; j <= num_eqs; ++j) {
a(j,k) *= t;
}
Expand All @@ -116,6 +121,7 @@ void dgefa (RArray2D& a, IArray1D& pivot, int& info)
}
}

AMREX_UNROLL_LOOP(NET_LOOP_UNROLL_LEN)
for (int i = k+1; i <= num_eqs; ++i) {
a(i,j) += t * a(i,k);
}
Expand Down
Loading