Skip to content

Commit 3f0595e

Browse files
committed
-mtune=/-mcpu= support for x86 AMD CPU's
1 parent 49db215 commit 3f0595e

5 files changed

Lines changed: 85 additions & 2 deletions

File tree

python_bindings/src/PyEnums.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,18 @@ void define_enums(py::module &m) {
155155
.value("RVV", Target::Feature::RVV)
156156
.value("ARMv81a", Target::Feature::ARMv81a)
157157
.value("SanitizerCoverage", Target::Feature::SanitizerCoverage)
158+
.value("TuneK8", Target::Feature::TuneK8)
159+
.value("TuneK8_SSE3", Target::Feature::TuneK8_SSE3)
160+
.value("TuneAMDFam10", Target::Feature::TuneAMDFam10)
161+
.value("TuneBtVer1", Target::Feature::TuneBtVer1)
162+
.value("TuneBdVer1", Target::Feature::TuneBdVer1)
163+
.value("TuneBdVer2", Target::Feature::TuneBdVer2)
164+
.value("TuneBdVer3", Target::Feature::TuneBdVer3)
165+
.value("TuneBdVer4", Target::Feature::TuneBdVer4)
166+
.value("TuneBtVer2", Target::Feature::TuneBtVer2)
167+
.value("TuneZnVer1", Target::Feature::TuneZnVer1)
168+
.value("TuneZnVer2", Target::Feature::TuneZnVer2)
169+
.value("TuneZnVer3", Target::Feature::TuneZnVer3)
158170
.value("FeatureEnd", Target::Feature::FeatureEnd);
159171

160172
py::enum_<halide_type_code_t>(m, "TypeCode")

src/CodeGen_X86.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -686,6 +686,34 @@ void CodeGen_X86::visit(const Store *op) {
686686
}
687687

688688
string CodeGen_X86::mcpu() const {
689+
// First, check if any explicit request for tuning exists.
690+
if (target.has_feature(Target::TuneK8)) {
691+
return "k8";
692+
} else if (target.has_feature(Target::TuneK8_SSE3)) {
693+
return "k8-sse3";
694+
} else if (target.has_feature(Target::TuneAMDFam10)) {
695+
return "amdfam10";
696+
} else if (target.has_feature(Target::TuneBtVer1)) {
697+
return "btver1";
698+
} else if (target.has_feature(Target::TuneBdVer1)) {
699+
return "bdver1";
700+
} else if (target.has_feature(Target::TuneBdVer2)) {
701+
return "bdver2";
702+
} else if (target.has_feature(Target::TuneBdVer3)) {
703+
return "bdver3";
704+
} else if (target.has_feature(Target::TuneBdVer4)) {
705+
return "bdver4";
706+
} else if (target.has_feature(Target::TuneBtVer2)) {
707+
return "btver2";
708+
} else if (target.has_feature(Target::TuneZnVer1)) {
709+
return "znver1";
710+
} else if (target.has_feature(Target::TuneZnVer2)) {
711+
return "znver2";
712+
} else if (target.has_feature(Target::TuneZnVer3)) {
713+
return "znver3";
714+
}
715+
716+
// And only after that, perform an ad-hoc guess for the tune given features.
689717
if (target.has_feature(Target::AVX512_SapphireRapids)) {
690718
return "sapphirerapids";
691719
} else if (target.has_feature(Target::AVX512_Cannonlake)) {

src/Target.cpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,18 @@ const std::map<std::string, Target::Feature> feature_name_map = {
385385
{"rvv", Target::RVV},
386386
{"armv81a", Target::ARMv81a},
387387
{"sanitizer_coverage", Target::SanitizerCoverage},
388+
{"tune_k8", Target::TuneK8},
389+
{"tune_k8_sse3", Target::TuneK8_SSE3},
390+
{"tune_amdfam10", Target::TuneAMDFam10},
391+
{"tune_btver1", Target::TuneBtVer1},
392+
{"tune_bdver1", Target::TuneBdVer1},
393+
{"tune_bdver2", Target::TuneBdVer2},
394+
{"tune_bdver3", Target::TuneBdVer3},
395+
{"tune_bdver4", Target::TuneBdVer4},
396+
{"tune_btver2", Target::TuneBtVer2},
397+
{"tune_znver1", Target::TuneZnVer1},
398+
{"tune_znver2", Target::TuneZnVer2},
399+
{"tune_znver3", Target::TuneZnVer3},
388400
// NOTE: When adding features to this map, be sure to update PyEnums.cpp as well.
389401
};
390402

@@ -453,7 +465,7 @@ bool merge_string(Target &t, const std::string &target) {
453465
}
454466
tokens.push_back(rest);
455467

456-
bool os_specified = false, arch_specified = false, bits_specified = false, features_specified = false;
468+
bool os_specified = false, arch_specified = false, bits_specified = false, tune_specified = false, features_specified = false;
457469
bool is_host = false;
458470

459471
for (size_t i = 0; i < tokens.size(); i++) {
@@ -484,6 +496,13 @@ bool merge_string(Target &t, const std::string &target) {
484496
}
485497
os_specified = true;
486498
} else if (lookup_feature(tok, feature)) {
499+
if (tok.substr(0, std::strlen("tune_")) == "tune_") {
500+
if (tune_specified) {
501+
// Only a single tune makes sense.
502+
return false;
503+
}
504+
tune_specified = true;
505+
}
487506
t.set_feature(feature);
488507
features_specified = true;
489508
} else if (tok == "trace_all") {
@@ -979,7 +998,7 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result)
979998
// clang-format on
980999

9811000
// clang-format off
982-
const std::array<Feature, 14> intersection_features = {{
1001+
const std::array<Feature, 15> intersection_features = {{
9831002
ARMv7s,
9841003
ARMv81a,
9851004
AVX,

src/Target.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,18 @@ struct Target {
132132
RVV = halide_target_feature_rvv,
133133
ARMv81a = halide_target_feature_armv81a,
134134
SanitizerCoverage = halide_target_feature_sanitizer_coverage,
135+
TuneK8 = halide_target_feature_tune_k8,
136+
TuneK8_SSE3 = halide_target_feature_tune_k8_sse3,
137+
TuneAMDFam10 = halide_target_feature_tune_amdfam10,
138+
TuneBtVer1 = halide_target_feature_tune_btver1,
139+
TuneBdVer1 = halide_target_feature_tune_bdver1,
140+
TuneBdVer2 = halide_target_feature_tune_bdver2,
141+
TuneBdVer3 = halide_target_feature_tune_bdver3,
142+
TuneBdVer4 = halide_target_feature_tune_bdver4,
143+
TuneBtVer2 = halide_target_feature_tune_btver2,
144+
TuneZnVer1 = halide_target_feature_tune_znver1,
145+
TuneZnVer2 = halide_target_feature_tune_znver2,
146+
TuneZnVer3 = halide_target_feature_tune_znver3,
135147
FeatureEnd = halide_target_feature_end
136148
};
137149
Target() = default;

src/runtime/HalideRuntime.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1347,6 +1347,18 @@ typedef enum halide_target_feature_t {
13471347
halide_target_feature_rvv, ///< Enable RISCV "V" Vector Extension
13481348
halide_target_feature_armv81a, ///< Enable ARMv8.1-a instructions
13491349
halide_target_feature_sanitizer_coverage, ///< Enable hooks for SanitizerCoverage support.
1350+
halide_target_feature_tune_k8, ///< Tune specifically for the AMD K8 CPU.
1351+
halide_target_feature_tune_k8_sse3, ///< Tune specifically for the AMD K8 w/SSE3 CPU.
1352+
halide_target_feature_tune_amdfam10, ///< Tune specifically for the AMD FAM10 CPU.
1353+
halide_target_feature_tune_btver1, ///< Tune specifically for the AMD BtVer1 CPU.
1354+
halide_target_feature_tune_bdver1, ///< Tune specifically for the AMD BdVer1 CPU.
1355+
halide_target_feature_tune_bdver2, ///< Tune specifically for the AMD BdVer2 CPU.
1356+
halide_target_feature_tune_bdver3, ///< Tune specifically for the AMD BdVer3 CPU.
1357+
halide_target_feature_tune_bdver4, ///< Tune specifically for the AMD BdVer4 CPU.
1358+
halide_target_feature_tune_btver2, ///< Tune specifically for the AMD BtVer2 CPU.
1359+
halide_target_feature_tune_znver1, ///< Tune specifically for the AMD ZnVer1 CPU.
1360+
halide_target_feature_tune_znver2, ///< Tune specifically for the AMD ZnVer2 CPU.
1361+
halide_target_feature_tune_znver3, ///< Tune specifically for the AMD ZnVer3 CPU.
13501362
halide_target_feature_end ///< A sentinel. Every target is considered to have this feature, and setting this feature does nothing.
13511363
} halide_target_feature_t;
13521364

0 commit comments

Comments
 (0)