Skip to content

Commit aecd903

Browse files
committed
Fix fundamental confusion about target/tune CPU
Sooo. Uh, remember when in halide#6655 we've agreed that we want to add support to precisely specify the CPU for which the code should be *tuned* for, but not *targeted* for. Aka, similar to clang's `-mtune=` option, that does not affect the ISA set selection? So guess what, that's not what we did, apparently. `CodeGen_LLVM::mcpu()` / `halide_mcpu` actually do specify the *target* CPU. It was obvious in retrospect, because e.g. `CodeGen_X86::mattrs()` does not, in fact, ever specify `+avx2`, yet we get AVX2 :) So we've unintentionally added `-march=` support. Oops. While i'd like to add `-march=` support, that was not the goal here. Fixing this is complicated by the fact that `llvm::Target::createTargetMachine()` only takes `CPU Target` string, you can't specify `CPU Tune`. But this is actually a blessing in disguise, because it allows us to fix another bug at the same time: There is a problem with halide "compile to llvm ir assembly", a lot of information from Halide Target is not //really// lowered into LLVM Module, but is embedded as a metadata, that is then extracted by halide `make_target_machine()`. While that is not a problem in itself, it makes it *impossible* to dump the LLVM IR, and manually play with it, because e.g. the CPU [Target] and Attributes (ISA set) are not actually lowered into the form LLVM understands, but are in some halide-specific metadata. So, to fix the first bug, we must lower the CPU Tune into per-function `"tune-cpu"` metadata, and while there we might as well lower `"target-cpu"` and `"target-features"` similarly.
1 parent 4ab4ad9 commit aecd903

13 files changed

Lines changed: 123 additions & 72 deletions

src/CodeGen_ARM.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@ class CodeGen_ARM : public CodeGen_Posix {
7272
};
7373
vector<Pattern> casts, calls, averagings, negations;
7474

75-
string mcpu() const override;
75+
string mcpu_target() const override;
76+
string mcpu_tune() const override;
7677
string mattrs() const override;
7778
bool use_soft_float_abi() const override;
7879
int native_vector_bits() const override;
@@ -1392,7 +1393,7 @@ Type CodeGen_ARM::upgrade_type_for_storage(const Type &t) const {
13921393
return CodeGen_Posix::upgrade_type_for_storage(t);
13931394
}
13941395

1395-
string CodeGen_ARM::mcpu() const {
1396+
string CodeGen_ARM::mcpu_target() const {
13961397
if (target.bits == 32) {
13971398
if (target.has_feature(Target::ARMv7s)) {
13981399
return "swift";
@@ -1410,6 +1411,10 @@ string CodeGen_ARM::mcpu() const {
14101411
}
14111412
}
14121413

1414+
string CodeGen_ARM::mcpu_tune() const {
1415+
return mcpu_target();
1416+
}
1417+
14131418
string CodeGen_ARM::mattrs() const {
14141419
if (target.bits == 32) {
14151420
if (target.has_feature(Target::ARMv7s)) {

src/CodeGen_Hexagon.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ class CodeGen_Hexagon : public CodeGen_Posix {
4242

4343
void init_module() override;
4444

45-
std::string mcpu() const override;
45+
std::string mcpu_target() const override;
46+
std::string mcpu_tune() const override;
4647
std::string mattrs() const override;
4748
int isa_version;
4849
bool use_soft_float_abi() const override;
@@ -1788,7 +1789,7 @@ Value *CodeGen_Hexagon::call_intrin(llvm::Type *result_type, const string &name,
17881789
fn, std::move(args));
17891790
}
17901791

1791-
string CodeGen_Hexagon::mcpu() const {
1792+
string CodeGen_Hexagon::mcpu_target() const {
17921793
if (target.has_feature(Halide::Target::HVX_v66)) {
17931794
return "hexagonv66";
17941795
} else if (target.has_feature(Halide::Target::HVX_v65)) {
@@ -1798,6 +1799,10 @@ string CodeGen_Hexagon::mcpu() const {
17981799
}
17991800
}
18001801

1802+
string CodeGen_Hexagon::mcpu_tune() const {
1803+
return mcpu_target();
1804+
}
1805+
18011806
string CodeGen_Hexagon::mattrs() const {
18021807
std::stringstream attrs;
18031808
attrs << "+hvx-length128b";

src/CodeGen_Internal.cpp

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -590,11 +590,9 @@ bool get_md_string(llvm::Metadata *value, std::string &result) {
590590
return false;
591591
}
592592

593-
void get_target_options(const llvm::Module &module, llvm::TargetOptions &options, std::string &mcpu, std::string &mattrs) {
593+
void get_target_options(const llvm::Module &module, llvm::TargetOptions &options) {
594594
bool use_soft_float_abi = false;
595595
get_md_bool(module.getModuleFlag("halide_use_soft_float_abi"), use_soft_float_abi);
596-
get_md_string(module.getModuleFlag("halide_mcpu"), mcpu);
597-
get_md_string(module.getModuleFlag("halide_mattrs"), mattrs);
598596
std::string mabi;
599597
get_md_string(module.getModuleFlag("halide_mabi"), mabi);
600598
bool use_pic = true;
@@ -629,9 +627,14 @@ void clone_target_options(const llvm::Module &from, llvm::Module &to) {
629627
to.addModuleFlag(llvm::Module::Warning, "halide_use_soft_float_abi", use_soft_float_abi ? 1 : 0);
630628
}
631629

632-
std::string mcpu;
633-
if (get_md_string(from.getModuleFlag("halide_mcpu"), mcpu)) {
634-
to.addModuleFlag(llvm::Module::Warning, "halide_mcpu", llvm::MDString::get(context, mcpu));
630+
std::string mcpu_target;
631+
if (get_md_string(from.getModuleFlag("halide_mcpu_target"), mcpu_target)) {
632+
to.addModuleFlag(llvm::Module::Warning, "halide_mcpu_target", llvm::MDString::get(context, mcpu_target));
633+
}
634+
635+
std::string mcpu_tune;
636+
if (get_md_string(from.getModuleFlag("halide_mcpu_tune"), mcpu_tune)) {
637+
to.addModuleFlag(llvm::Module::Warning, "halide_mcpu_tune", llvm::MDString::get(context, mcpu_tune));
635638
}
636639

637640
std::string mattrs;
@@ -657,9 +660,7 @@ std::unique_ptr<llvm::TargetMachine> make_target_machine(const llvm::Module &mod
657660
internal_assert(llvm_target) << "Could not create LLVM target for " << triple.str() << "\n";
658661

659662
llvm::TargetOptions options;
660-
std::string mcpu = "";
661-
std::string mattrs = "";
662-
get_target_options(module, options, mcpu, mattrs);
663+
get_target_options(module, options);
663664

664665
bool use_pic = true;
665666
get_md_bool(module.getModuleFlag("halide_use_pic"), use_pic);
@@ -668,18 +669,29 @@ std::unique_ptr<llvm::TargetMachine> make_target_machine(const llvm::Module &mod
668669
get_md_bool(module.getModuleFlag("halide_use_large_code_model"), use_large_code_model);
669670

670671
auto *tm = llvm_target->createTargetMachine(module.getTargetTriple(),
671-
mcpu, mattrs,
672+
/*CPU target=*/"", /*Features=*/"",
672673
options,
673674
use_pic ? llvm::Reloc::PIC_ : llvm::Reloc::Static,
674675
use_large_code_model ? llvm::CodeModel::Large : llvm::CodeModel::Small,
675676
llvm::CodeGenOpt::Aggressive);
676677
return std::unique_ptr<llvm::TargetMachine>(tm);
677678
}
678679

679-
void set_function_attributes_for_target(llvm::Function *fn, const Target &t) {
680+
void set_function_attributes_from_halide_target_options(llvm::Function &fn) {
681+
llvm::Module &module = *fn.getParent();
682+
683+
std::string mcpu_target, mcpu_tune, mattrs;
684+
get_md_string(module.getModuleFlag("halide_mcpu_target"), mcpu_target);
685+
get_md_string(module.getModuleFlag("halide_mcpu_tune"), mcpu_tune);
686+
get_md_string(module.getModuleFlag("halide_mattrs"), mattrs);
687+
688+
fn.addFnAttr("target-cpu", mcpu_target);
689+
fn.addFnAttr("tune-cpu", mcpu_tune);
690+
fn.addFnAttr("target-features", mattrs);
691+
680692
// Turn off approximate reciprocals for division. It's too
681693
// inaccurate even for us.
682-
fn->addFnAttr("reciprocal-estimates", "none");
694+
fn.addFnAttr("reciprocal-estimates", "none");
683695
}
684696

685697
void embed_bitcode(llvm::Module *M, const string &halide_command) {

src/CodeGen_Internal.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,17 +92,17 @@ Expr lower_signed_shift_right(const Expr &a, const Expr &b);
9292
/** Reduce a mux intrinsic to a select tree */
9393
Expr lower_mux(const Call *mux);
9494

95-
/** Given an llvm::Module, set llvm:TargetOptions, cpu and attr information */
96-
void get_target_options(const llvm::Module &module, llvm::TargetOptions &options, std::string &mcpu, std::string &mattrs);
95+
/** Given an llvm::Module, set llvm:TargetOptions information */
96+
void get_target_options(const llvm::Module &module, llvm::TargetOptions &options);
9797

9898
/** Given two llvm::Modules, clone target options from one to the other */
9999
void clone_target_options(const llvm::Module &from, llvm::Module &to);
100100

101101
/** Given an llvm::Module, get or create an llvm:TargetMachine */
102102
std::unique_ptr<llvm::TargetMachine> make_target_machine(const llvm::Module &module);
103103

104-
/** Set the appropriate llvm Function attributes given a Target. */
105-
void set_function_attributes_for_target(llvm::Function *, const Target &);
104+
/** Set the appropriate llvm Function attributes given the Halide Target. */
105+
void set_function_attributes_from_halide_target_options(llvm::Function &);
106106

107107
/** Save a copy of the llvm IR currently represented by the module as
108108
* data in the __LLVM,__bitcode section. Emulates clang's

src/CodeGen_LLVM.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,8 @@ void CodeGen_LLVM::init_codegen(const std::string &name, bool any_strict_float)
455455

456456
// Add some target specific info to the module as metadata.
457457
module->addModuleFlag(llvm::Module::Warning, "halide_use_soft_float_abi", use_soft_float_abi() ? 1 : 0);
458-
module->addModuleFlag(llvm::Module::Warning, "halide_mcpu", MDString::get(*context, mcpu()));
458+
module->addModuleFlag(llvm::Module::Warning, "halide_mcpu_target", MDString::get(*context, mcpu_target()));
459+
module->addModuleFlag(llvm::Module::Warning, "halide_mcpu_tune", MDString::get(*context, mcpu_tune()));
459460
module->addModuleFlag(llvm::Module::Warning, "halide_mattrs", MDString::get(*context, mattrs()));
460461
module->addModuleFlag(llvm::Module::Warning, "halide_mabi", MDString::get(*context, mabi()));
461462
module->addModuleFlag(llvm::Module::Warning, "halide_use_pic", use_pic() ? 1 : 0);
@@ -523,7 +524,7 @@ std::unique_ptr<llvm::Module> CodeGen_LLVM::compile(const Module &input) {
523524
}
524525
FunctionType *func_t = FunctionType::get(i32_t, arg_types, false);
525526
function = llvm::Function::Create(func_t, llvm_linkage(f.linkage), names.extern_name, module.get());
526-
set_function_attributes_for_target(function, target);
527+
set_function_attributes_from_halide_target_options(*function);
527528

528529
// Mark the buffer args as no alias and save indication for add_argv_wrapper if needed
529530
std::vector<bool> buffer_args(f.args.size());
@@ -564,6 +565,8 @@ std::unique_ptr<llvm::Module> CodeGen_LLVM::compile(const Module &input) {
564565
}
565566

566567
std::unique_ptr<llvm::Module> CodeGen_LLVM::finish_codegen() {
568+
llvm::for_each(*module, set_function_attributes_from_halide_target_options);
569+
567570
// Verify the module is ok
568571
internal_assert(!verifyModule(*module, &llvm::errs()));
569572
debug(2) << "Done generating llvm bitcode\n";

src/CodeGen_LLVM.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,12 @@ class CodeGen_LLVM : public IRVisitor {
106106
virtual void end_func(const std::vector<LoweredArgument> &args);
107107
// @}
108108

109-
/** What should be passed as -mcpu, -mattrs, and related for
110-
* compilation. The architecture-specific code generator should
111-
* define these. */
109+
/** What should be passed as -mcpu (warning: implies attrs!), -mattrs,
110+
* and related for compilation. The architecture-specific code generator
111+
* should define these. */
112112
// @{
113-
virtual std::string mcpu() const = 0;
113+
virtual std::string mcpu_target() const = 0;
114+
virtual std::string mcpu_tune() const = 0;
114115
virtual std::string mattrs() const = 0;
115116
virtual std::string mabi() const;
116117
virtual bool use_soft_float_abi() const = 0;

src/CodeGen_MIPS.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ class CodeGen_MIPS : public CodeGen_Posix {
1919
protected:
2020
using CodeGen_Posix::visit;
2121

22-
string mcpu() const override;
22+
string mcpu_target() const override;
23+
string mcpu_tune() const override;
2324
string mattrs() const override;
2425
bool use_soft_float_abi() const override;
2526
int native_vector_bits() const override;
@@ -29,14 +30,18 @@ CodeGen_MIPS::CodeGen_MIPS(const Target &t)
2930
: CodeGen_Posix(t) {
3031
}
3132

32-
string CodeGen_MIPS::mcpu() const {
33+
string CodeGen_MIPS::mcpu_target() const {
3334
if (target.bits == 32) {
3435
return "";
3536
} else {
3637
return "";
3738
}
3839
}
3940

41+
string CodeGen_MIPS::mcpu_tune() const {
42+
return mcpu_target();
43+
}
44+
4045
string CodeGen_MIPS::mattrs() const {
4146
if (target.bits == 32) {
4247
return "";

src/CodeGen_PTX_Dev.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ class CodeGen_PTX_Dev : public CodeGen_LLVM, public CodeGen_GPU_Dev {
9191
// @}
9292

9393
std::string march() const;
94-
std::string mcpu() const override;
94+
std::string mcpu_target() const override;
95+
std::string mcpu_tune() const override;
9596
std::string mattrs() const override;
9697
bool use_soft_float_abi() const override;
9798
int native_vector_bits() const override;
@@ -153,7 +154,7 @@ void CodeGen_PTX_Dev::add_kernel(Stmt stmt,
153154
// Make our function
154155
FunctionType *func_t = FunctionType::get(void_t, arg_types, false);
155156
function = llvm::Function::Create(func_t, llvm::Function::ExternalLinkage, name, module.get());
156-
set_function_attributes_for_target(function, target);
157+
set_function_attributes_from_halide_target_options(*function);
157158

158159
// Mark the buffer args as no alias
159160
for (size_t i = 0; i < args.size(); i++) {
@@ -542,7 +543,7 @@ string CodeGen_PTX_Dev::march() const {
542543
return "nvptx64";
543544
}
544545

545-
string CodeGen_PTX_Dev::mcpu() const {
546+
string CodeGen_PTX_Dev::mcpu_target() const {
546547
if (target.has_feature(Target::CUDACapability86)) {
547548
return "sm_86";
548549
} else if (target.has_feature(Target::CUDACapability80)) {
@@ -566,6 +567,10 @@ string CodeGen_PTX_Dev::mcpu() const {
566567
}
567568
}
568569

570+
string CodeGen_PTX_Dev::mcpu_tune() const {
571+
return mcpu_target();
572+
}
573+
569574
string CodeGen_PTX_Dev::mattrs() const {
570575
if (target.has_feature(Target::CUDACapability86)) {
571576
return "+ptx71";
@@ -617,7 +622,7 @@ vector<char> CodeGen_PTX_Dev::compile_to_src() {
617622

618623
std::unique_ptr<TargetMachine>
619624
target_machine(llvm_target->createTargetMachine(triple.str(),
620-
mcpu(), mattrs(), options,
625+
mcpu_target(), mattrs(), options,
621626
llvm::Reloc::PIC_,
622627
llvm::CodeModel::Small,
623628
CodeGenOpt::Aggressive));
@@ -758,7 +763,7 @@ vector<char> CodeGen_PTX_Dev::compile_to_src() {
758763
f.write(buffer.data(), buffer.size());
759764
f.close();
760765

761-
string cmd = "ptxas --gpu-name " + mcpu() + " " + ptx.pathname() + " -o " + sass.pathname();
766+
string cmd = "ptxas --gpu-name " + mcpu_target() + " " + ptx.pathname() + " -o " + sass.pathname();
762767
if (system(cmd.c_str()) == 0) {
763768
cmd = "nvdisasm " + sass.pathname();
764769
int ret = system(cmd.c_str());

src/CodeGen_PowerPC.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ class CodeGen_PowerPC : public CodeGen_Posix {
2222
protected:
2323
void init_module() override;
2424

25-
string mcpu() const override;
25+
string mcpu_target() const override;
26+
string mcpu_tune() const override;
2627
string mattrs() const override;
2728
bool use_soft_float_abi() const override;
2829
int native_vector_bits() const override;
@@ -141,7 +142,7 @@ void CodeGen_PowerPC::visit(const Max *op) {
141142
return CodeGen_Posix::visit(op);
142143
}
143144

144-
string CodeGen_PowerPC::mcpu() const {
145+
string CodeGen_PowerPC::mcpu_target() const {
145146
if (target.bits == 32) {
146147
return "ppc32";
147148
} else {
@@ -155,6 +156,10 @@ string CodeGen_PowerPC::mcpu() const {
155156
}
156157
}
157158

159+
string CodeGen_PowerPC::mcpu_tune() const {
160+
return mcpu_target();
161+
}
162+
158163
string CodeGen_PowerPC::mattrs() const {
159164
string features;
160165
string separator;

src/CodeGen_RISCV.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ class CodeGen_RISCV : public CodeGen_Posix {
1919
protected:
2020
using CodeGen_Posix::visit;
2121

22-
string mcpu() const override;
22+
string mcpu_target() const override;
23+
string mcpu_tune() const override;
2324
string mattrs() const override;
2425
string mabi() const override;
2526
bool use_soft_float_abi() const override;
@@ -30,10 +31,14 @@ CodeGen_RISCV::CodeGen_RISCV(const Target &t)
3031
: CodeGen_Posix(t) {
3132
}
3233

33-
string CodeGen_RISCV::mcpu() const {
34+
string CodeGen_RISCV::mcpu_target() const {
3435
return "";
3536
}
3637

38+
string CodeGen_RISCV::mcpu_tune() const {
39+
return mcpu_target();
40+
}
41+
3742
string CodeGen_RISCV::mattrs() const {
3843
// Note: the default march is "rv[32|64]imafdc",
3944
// which includes standard extensions:

0 commit comments

Comments
 (0)