Skip to content

Commit 4c8682d

Browse files
committed
PR feedback from SingleAccretion
Reorder invariant nodes in simple scenarios in stackifier Jitdump when moving nodes in stackifier When regallocwasm creates a new store node, lower it Apply regallocwasm fix from andy Checkpoint Checkpoint Add comment Speculatively implement the dstOnStack optimization (code that hits it doesn't compile yet)
1 parent f5ba5f6 commit 4c8682d

File tree

6 files changed

+143
-3
lines changed

6 files changed

+143
-3
lines changed

src/coreclr/jit/codegenwasm.cpp

Lines changed: 97 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
static const int LINEAR_MEMORY_INDEX = 0;
1414

1515
#ifdef TARGET_64BIT
16+
static const instruction INS_I_load = INS_i64_load;
17+
static const instruction INS_I_store = INS_i64_store;
1618
static const instruction INS_I_const = INS_i64_const;
1719
static const instruction INS_I_add = INS_i64_add;
1820
static const instruction INS_I_mul = INS_i64_mul;
@@ -21,6 +23,8 @@ static const instruction INS_I_le_u = INS_i64_le_u;
2123
static const instruction INS_I_ge_u = INS_i64_ge_u;
2224
static const instruction INS_I_gt_u = INS_i64_gt_u;
2325
#else // !TARGET_64BIT
26+
static const instruction INS_I_load = INS_i32_load;
27+
static const instruction INS_I_store = INS_i32_store;
2428
static const instruction INS_I_const = INS_i32_const;
2529
static const instruction INS_I_add = INS_i32_add;
2630
static const instruction INS_I_mul = INS_i32_mul;
@@ -427,7 +431,9 @@ void CodeGen::WasmProduceReg(GenTree* node)
427431
//
428432
// If the operand is a candidate, we use that candidate's current register.
429433
// Otherwise it must have been allocated into a temporary register initialized
430-
// in 'WasmProduceReg'.
434+
// in 'WasmProduceReg'. To do this, set the LIR::Flags::MultiplyUsed flag during
435+
// lowering or other pre-regalloc phases, and ensure that regalloc is updated to
436+
// call CollectReferences on the node(s) that need to be used multiple times.
431437
//
432438
// Arguments:
433439
// operand - The operand node
@@ -2420,9 +2426,98 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
24202426
}
24212427
}
24222428

2429+
//------------------------------------------------------------------------
2430+
// genCodeForCpObj: Produce code for a GT_STORE_BLK node that represents a cpobj operation.
2431+
//
2432+
// Arguments:
2433+
// cpObjNode - the node
2434+
//
24232435
void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
24242436
{
2425-
NYI_WASM("genCodeForCpObj");
2437+
GenTree* dstAddr = cpObjNode->Addr();
2438+
GenTree* source = cpObjNode->Data();
2439+
var_types srcAddrType = TYP_BYREF;
2440+
2441+
assert(source->isContained());
2442+
if (source->OperIs(GT_IND))
2443+
{
2444+
source = source->gtGetOp1();
2445+
assert(!source->isContained());
2446+
srcAddrType = source->TypeGet();
2447+
}
2448+
2449+
noway_assert(source->IsLocal());
2450+
noway_assert(dstAddr->IsLocal());
2451+
2452+
// If the destination is on the stack we don't need the write barrier.
2453+
bool dstOnStack = cpObjNode->IsAddressNotOnHeap(m_compiler);
2454+
2455+
#ifdef DEBUG
2456+
assert(!dstAddr->isContained());
2457+
2458+
// This GenTree node has data about GC pointers, this means we're dealing
2459+
// with CpObj.
2460+
assert(cpObjNode->GetLayout()->HasGCPtr());
2461+
#endif // DEBUG
2462+
2463+
genConsumeRegs(cpObjNode);
2464+
2465+
ClassLayout* layout = cpObjNode->GetLayout();
2466+
unsigned slots = layout->GetSlotCount();
2467+
2468+
regNumber srcReg = GetMultiUseOperandReg(source);
2469+
regNumber dstReg = GetMultiUseOperandReg(dstAddr);
2470+
2471+
if (cpObjNode->IsVolatile())
2472+
{
2473+
// TODO-WASM: Memory barrier
2474+
}
2475+
2476+
emitter* emit = GetEmitter();
2477+
2478+
emitAttr attrSrcAddr = emitActualTypeSize(srcAddrType);
2479+
emitAttr attrDstAddr = emitActualTypeSize(dstAddr->TypeGet());
2480+
2481+
unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();
2482+
2483+
unsigned i = 0, offset = 0;
2484+
while (i < slots)
2485+
{
2486+
// Copy the pointer-sized non-gc-pointer slots one at a time (and GC pointer slots if the destination is stack)
2487+
// using regular I-sized load/store pairs.
2488+
if (dstOnStack || !layout->IsGCPtr(i))
2489+
{
2490+
// Do a pointer-sized load+store pair at the appropriate offset relative to dest and source
2491+
emit->emitIns_I(INS_local_get, attrDstAddr, WasmRegToIndex(dstReg));
2492+
emit->emitIns_I(INS_local_get, attrSrcAddr, WasmRegToIndex(srcReg));
2493+
emit->emitIns_I(INS_I_load, EA_PTRSIZE, offset);
2494+
emit->emitIns_I(INS_I_store, EA_PTRSIZE, offset);
2495+
}
2496+
else
2497+
{
2498+
// Compute the actual dest/src of the slot being copied to pass to the helper.
2499+
emit->emitIns_I(INS_local_get, attrDstAddr, WasmRegToIndex(dstReg));
2500+
emit->emitIns_I(INS_i32_const, attrDstAddr, offset);
2501+
emit->emitIns(INS_i32_add);
2502+
emit->emitIns_I(INS_local_get, attrSrcAddr, WasmRegToIndex(srcReg));
2503+
emit->emitIns_I(INS_i32_const, attrSrcAddr, offset);
2504+
emit->emitIns(INS_i32_add);
2505+
// Call the byref assign helper. On other targets this updates the dst/src regs but here it won't,
2506+
// so we have to do the local.get+i32.const+i32.add dance every time.
2507+
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
2508+
gcPtrCount--;
2509+
}
2510+
++i;
2511+
offset += TARGET_POINTER_SIZE;
2512+
}
2513+
assert(gcPtrCount == 0);
2514+
2515+
if (cpObjNode->IsVolatile())
2516+
{
2517+
// TODO-WASM: Memory barrier
2518+
}
2519+
2520+
WasmProduceReg(cpObjNode);
24262521
}
24272522

24282523
//------------------------------------------------------------------------

src/coreclr/jit/compiler.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2566,6 +2566,10 @@ class Compiler
25662566
friend class ReplaceVisitor;
25672567
friend class FlowGraphNaturalLoop;
25682568

2569+
#ifdef TARGET_WASM
2570+
friend class WasmRegAlloc; // For m_pLowering
2571+
#endif
2572+
25692573
#ifdef FEATURE_HW_INTRINSICS
25702574
friend struct GenTreeHWIntrinsic;
25712575
friend struct HWIntrinsicInfo;

src/coreclr/jit/gentree.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12844,7 +12844,7 @@ void Compiler::gtDispTree(GenTree* tree,
1284412844

1284512845
#ifdef TARGET_WASM
1284612846
case GenTreeBlk::BlkOpKindNativeOpcode:
12847-
printf(" (memory.copy|fill)");
12847+
printf(" (memory.%s)", tree->OperIsCopyBlkOp() ? "copy" : "fill");
1284812848
break;
1284912849
#endif
1285012850

src/coreclr/jit/lowerwasm.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,11 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
254254
}
255255

256256
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjUnroll;
257+
dstAddr->gtLIRFlags |= LIR::Flags::MultiplyUsed;
258+
if (src->OperIs(GT_IND))
259+
src->gtGetOp1()->gtLIRFlags |= LIR::Flags::MultiplyUsed;
260+
else
261+
src->gtLIRFlags |= LIR::Flags::MultiplyUsed;
257262
}
258263
else
259264
{
@@ -512,6 +517,14 @@ void Lowering::AfterLowerBlock()
512517
// instead be ifdef-ed out for WASM.
513518
m_anyChanges = true;
514519

520+
if (node->IsInvariant())
521+
{
522+
JITDUMP("Stackifier moving invariant node [%06u] after [%06u]\n", Compiler::dspTreeID(node), Compiler::dspTreeID(prev));
523+
m_lower->BlockRange().Remove(node);
524+
m_lower->BlockRange().InsertAfter(prev, node);
525+
break;
526+
}
527+
515528
JITDUMP("node==[%06u] prev==[%06u]\n", Compiler::dspTreeID(node), Compiler::dspTreeID(prev));
516529
NYI_WASM("IR not in a stackified form");
517530
}

src/coreclr/jit/regallocwasm.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
#include "regallocwasm.h"
1010

11+
#include "lower.h" // for LowerRange()
12+
1113
RegAllocInterface* GetRegisterAllocator(Compiler* compiler)
1214
{
1315
return new (compiler->getAllocator(CMK_LSRA)) WasmRegAlloc(compiler);
@@ -328,6 +330,9 @@ void WasmRegAlloc::CollectReferencesForNode(GenTree* node)
328330
case GT_SUB:
329331
case GT_MUL:
330332
CollectReferencesForBinop(node->AsOp());
333+
334+
case GT_STORE_BLK:
335+
CollectReferencesForBlockStore(node->AsBlk());
331336
break;
332337

333338
default:
@@ -417,6 +422,21 @@ void WasmRegAlloc::CollectReferencesForBinop(GenTreeOp* binopNode)
417422
ConsumeTemporaryRegForOperand(binopNode->gtGetOp1() DEBUGARG("binop overflow check"));
418423
}
419424

425+
// CollectReferencesForBlockStore: Collect virtual register references for a block store.
426+
//
427+
// Arguments:
428+
// node - The GT_STORE_BLK node
429+
//
430+
void WasmRegAlloc::CollectReferencesForBlockStore(GenTreeBlk* node)
431+
{
432+
GenTree* src = node->Data();
433+
if (src->OperIs(GT_IND))
434+
src = src->gtGetOp1();
435+
436+
ConsumeTemporaryRegForOperand(src DEBUGARG("block store source"));
437+
ConsumeTemporaryRegForOperand(node->Addr() DEBUGARG("block store destination"));
438+
}
439+
420440
//------------------------------------------------------------------------
421441
// CollectReferencesForLclVar: Collect virtual register references for a LCL_VAR.
422442
//
@@ -476,6 +496,9 @@ void WasmRegAlloc::RewriteLocalStackStore(GenTreeLclVarCommon* lclNode)
476496
CurrentRange().InsertAfter(lclNode, store);
477497
CurrentRange().Remove(lclNode);
478498
CurrentRange().InsertBefore(insertionPoint, lclNode);
499+
500+
auto tempRange = LIR::ReadOnlyRange(store, store);
501+
m_compiler->m_pLowering->LowerRange(m_currentBlock, tempRange);
479502
}
480503

481504
//------------------------------------------------------------------------
@@ -539,6 +562,8 @@ void WasmRegAlloc::RequestTemporaryRegisterForMultiplyUsedNode(GenTree* node)
539562
// Note how due to the fact we're processing nodes in stack order,
540563
// we don't need to maintain free/busy sets, only a simple stack.
541564
regNumber reg = AllocateTemporaryRegister(genActualType(node));
565+
// If the node already has a regnum, trying to assign it a second one is no good.
566+
assert(node->GetRegNum() == REG_NA);
542567
node->SetRegNum(reg);
543568
}
544569

@@ -561,6 +586,7 @@ void WasmRegAlloc::ConsumeTemporaryRegForOperand(GenTree* operand DEBUGARG(const
561586
}
562587

563588
regNumber reg = ReleaseTemporaryRegister(genActualType(operand));
589+
// If this assert fails you likely called ConsumeTemporaryRegForOperand on your operands in the wrong order.
564590
assert(reg == operand->GetRegNum());
565591
CollectReference(operand);
566592

@@ -605,6 +631,7 @@ void WasmRegAlloc::ResolveReferences()
605631
{
606632
TemporaryRegStack& temporaryRegs = m_temporaryRegs[static_cast<unsigned>(type)];
607633
TemporaryRegBank& allocatedTemporaryRegs = temporaryRegMap[static_cast<unsigned>(type)];
634+
// If temporaryRegs.Count != 0 that means CollectReferences failed to CollectReference one or more multiply-used nodes.
608635
assert(temporaryRegs.Count == 0);
609636

610637
allocatedTemporaryRegs.Count = temporaryRegs.MaxCount;

src/coreclr/jit/regallocwasm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ class WasmRegAlloc : public RegAllocInterface
125125
void CollectReferencesForCast(GenTreeOp* castNode);
126126
void CollectReferencesForBinop(GenTreeOp* binOpNode);
127127
void CollectReferencesForLclVar(GenTreeLclVar* lclVar);
128+
void CollectReferencesForBlockStore(GenTreeBlk* node);
128129
void RewriteLocalStackStore(GenTreeLclVarCommon* node);
129130
void CollectReference(GenTree* node);
130131
void RequestTemporaryRegisterForMultiplyUsedNode(GenTree* node);

0 commit comments

Comments
 (0)