Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions halide/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Prerequisites
*.d

# Compiled Object files
*.slo
*.lo
*.o
*.obj

# Precompiled Headers
*.gch
*.pch

# Compiled Dynamic libraries
*.so
*.dylib
*.dll

# Fortran module files
*.mod
*.smod

# Compiled Static libraries
*.lai
*.la
*.a
*.lib

# Executables
*.exe
*.out
*.app
14 changes: 14 additions & 0 deletions halide/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
cmake_minimum_required(VERSION 3.16)
project(NPBench_Halide)

find_package(likwid REQUIRED COMPONENTS marker)
find_package(OpenMP REQUIRED)
if (OPENMP_FOUND)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
endif()

add_subdirectory(deep_learning)
add_subdirectory(image_processing)
add_subdirectory(weather_stencils)
5 changes: 5 additions & 0 deletions halide/deep_learning/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
cmake_minimum_required(VERSION 3.16)

add_subdirectory(conv2d_bias)
add_subdirectory(softmax)
add_subdirectory(mlp)
28 changes: 28 additions & 0 deletions halide/deep_learning/conv2d_bias/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 3.16)
project(conv2d_bias)

# Set up language settings
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED YES)
set(CMAKE_CXX_EXTENSIONS NO)

# Find Halide
find_package(Halide REQUIRED)

# Generator
add_halide_generator(conv2d_bias.generator SOURCES conv2d_bias_generator.cpp)

# Filters
add_halide_library(conv2d_bias FROM conv2d_bias.generator)
add_halide_library(conv2d_bias_auto_schedule FROM conv2d_bias.generator
GENERATOR conv2d_bias
AUTOSCHEDULER Halide::Adams2019)

# Main executable
add_executable(conv2d_bias_process process.cpp)
target_link_libraries(conv2d_bias_process
PRIVATE
likwid::likwid
Halide::ImageIO
conv2d_bias
conv2d_bias_auto_schedule)
70 changes: 70 additions & 0 deletions halide/deep_learning/conv2d_bias/conv2d_bias_generator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#include "Halide.h"

namespace {

using namespace Halide;

class Conv2dBias : public Halide::Generator<Conv2dBias> {
public:
Input<Buffer<float, 4>> input{"input"};
Input<Buffer<float, 4>> filter{"filter"};
Input<Buffer<float, 1>> bias{"bias"};
Output<Buffer<float, 4>> output{"output"};

void generate() {
const int N = 8, CI = 3, CO = 16, W = 256, H = 256, K = 20;
const int border = K - 1;
/* THE ALGORITHM */

Var x("x"), y("y"), c("c"), n("n");

RDom r(0, CI, 0, K, 0, K);

output(c, x, y, n) = bias(c);
output(c, x, y, n) += filter(r.x, r.y, r.z, c) * input(r.x, x + r.y, y + r.z, n);

/* THE SCHEDULE */

// Ask Halide to compile for this specific size:

input.dim(0).set_bounds(0, CI).set_stride(1);
input.dim(1).set_bounds(0, W).set_stride(CI);
input.dim(2).set_bounds(0, H).set_stride(CI * W);
input.dim(3).set_bounds(0, N).set_stride(CI * W * H);

filter.dim(0).set_bounds(0, CI).set_stride(1);
filter.dim(1).set_bounds(0, K).set_stride(CI);
filter.dim(2).set_bounds(0, K).set_stride(CI * K);
filter.dim(3).set_bounds(0, CO).set_stride(CI * K * K);

bias.dim(0).set_bounds(0, CO).set_stride(1);

output.dim(0).set_bounds(0, CO).set_stride(1);
output.dim(1).set_bounds(0, W - border).set_stride(CO);
output.dim(2).set_bounds(0, H - border).set_stride(CO * (W - border));
output.dim(3).set_bounds(0, N).set_stride(CO * (W - border) * (H - border));

// estimates

input.dim(0).set_estimate(0, CI);
input.dim(1).set_estimate(0, W);
input.dim(2).set_estimate(0, H);
input.dim(3).set_estimate(0, N);

output.dim(0).set_estimate(0, CO);
output.dim(1).set_estimate(0, W - border);
output.dim(2).set_estimate(0, H - border);
output.dim(3).set_estimate(0, N);

filter.dim(0).set_estimate(0, CI);
filter.dim(1).set_estimate(0, K);
filter.dim(2).set_estimate(0, K);
filter.dim(3).set_estimate(0, CO);

bias.dim(0).set_estimate(0, CO);
}
};

} // namespace

HALIDE_REGISTER_GENERATOR(Conv2dBias, conv2d_bias)
107 changes: 107 additions & 0 deletions halide/deep_learning/conv2d_bias/process.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#include <chrono>
#include <cstdio>
#include <omp.h>

#include "conv2d_bias.h"
#include "conv2d_bias_auto_schedule.h"

#include "HalideBuffer.h"

#ifdef LIKWID_PERFMON
#include <likwid-marker.h>
#else
#define LIKWID_MARKER_INIT
#define LIKWID_MARKER_THREADINIT
#define LIKWID_MARKER_SWITCH
#define LIKWID_MARKER_REGISTER(regionTag)
#define LIKWID_MARKER_START(regionTag)
#define LIKWID_MARKER_STOP(regionTag)
#define LIKWID_MARKER_CLOSE
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
#endif

using namespace Halide::Runtime;

int main(int argc, char **argv) {
const int N = 8, CI = 3, CO = 16, W = 256, H = 256, K = 20;

const int border = K - 1;

Buffer<float, 4> input(CI, W, H, N);
Buffer<float, 4> filter(CI, K, K, CO);
Buffer<float, 1> bias(CO);
Buffer<float, 4> output(CO, W - border, H - border, N);

for (int n = 0; n < N; n++) {
for (int y = 0; y < H; y++) {
for (int x = 0; x < W; x++) {
for (int c = 0; c < CI; c++) {
input(c, x, y, n) = rand();
}
}
}
}

for (int co = 0; co < CO; co++) {
for (int y = 0; y < K; y++) {
for (int x = 0; x < K; x++) {
for (int ci = 0; ci < CI; ci++) {
filter(ci, x, y, co) = rand();
}
}
}
}

for (int x = 0; x < CO; x++) {
bias(x) = rand();
}

// Timing code

std::vector<double> runtimes;
for (int i = 0; i < 30; i++)
{
Buffer<float, 4> input_ = input.copy();
Buffer<float, 4> filter_ = filter.copy();
Buffer<float, 1> bias_ = bias.copy();
Buffer<float, 4> output_ = output.copy();

double t_start = omp_get_wtime();

conv2d_bias_auto_schedule(input_, filter_, bias_, output_);
output_.device_sync();

double t_end = omp_get_wtime();
runtimes.push_back(t_end - t_start);
}

auto n = runtimes.size() / 2;
nth_element(runtimes.begin(), runtimes.begin()+n, runtimes.end());

auto med = runtimes[n];
if(!(runtimes.size() & 1)) {
auto max_it = max_element(runtimes.begin(), runtimes.begin()+n);
med = (*max_it + med) / 2.0;
}
printf("Runtime: %f\n", med);

Buffer<float, 4> input_ = input.copy();
Buffer<float, 4> filter_ = filter.copy();
Buffer<float, 1> bias_ = bias.copy();
Buffer<float, 4> output_ = output.copy();

LIKWID_MARKER_INIT;
LIKWID_MARKER_THREADINIT;

LIKWID_MARKER_START("Compute");

conv2d_bias_auto_schedule(input_, filter_, bias_, output_);
output_.device_sync();

LIKWID_MARKER_STOP("Compute");

LIKWID_MARKER_CLOSE;

printf("Success!\n");
return 0;
}
28 changes: 28 additions & 0 deletions halide/deep_learning/mlp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 3.16)
project(mlp)

# Set up language settings
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED YES)
set(CMAKE_CXX_EXTENSIONS NO)

# Find Halide
find_package(Halide REQUIRED)

# Generator
add_halide_generator(mlp.generator SOURCES mlp_generator.cpp)

# Filters
add_halide_library(mlp FROM mlp.generator)
add_halide_library(mlp_auto_schedule FROM mlp.generator
GENERATOR mlp
AUTOSCHEDULER Halide::Adams2019)

# Main executable
add_executable(mlp_process process.cpp)
target_link_libraries(mlp_process
PRIVATE
likwid::likwid
Halide::ImageIO
mlp
mlp_auto_schedule)
Loading