Skip to content

Commit d30d2b8

Browse files
committed
fix SROA
1 parent b59b495 commit d30d2b8

1 file changed

Lines changed: 7 additions & 3 deletions

File tree

src/codegen.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2225,9 +2225,13 @@ static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty, Align align)
22252225

22262226
static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, unsigned nb, Align align)
22272227
{
2228-
if (nb == 1 && align == Align(1))
2229-
return emit_static_alloca(ctx, ctx.builder.getInt8Ty(), align);
2230-
return emit_static_alloca(ctx, ArrayType::get(ctx.builder.getInt8Ty(), alignTo(nb, align)), align);
2228+
// Stupid hack: SROA takes hints from the element type, and will happily split this allocation into lots of unaligned bits
2229+
// if it cannot find something better to do, which is terrible for performance.
2230+
// However, if we emit this with an element size equal to the alignment, it will instead split it into aligned chunks
2231+
// which is great for performance and vectorization.
2232+
if (alignTo(nb, align) / align.value() == 1) // don't bother with making an array of length 1
2233+
return emit_static_alloca(ctx, ctx.builder.getIntNTy(align.value() * 8), align);
2234+
return emit_static_alloca(ctx, ArrayType::get(ctx.builder.getIntNTy(align.value() * 8), alignTo(nb, align) / align.value()), align);
22312235
}
22322236

22332237
static AllocaInst *emit_static_roots(jl_codectx_t &ctx, unsigned nroots)

0 commit comments

Comments
 (0)