Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
4c8682d
PR feedback from SingleAccretion
kg Feb 23, 2026
76e30d0
Address copilot feedback
kg Feb 25, 2026
90f2757
Repair merge damage
kg Feb 25, 2026
8335ca1
jit-format
kg Feb 25, 2026
7029698
loadStructWithRefs compiles now
kg Feb 25, 2026
3c5e30b
When doing a cpobj to the stack just generate a memcpy instead
kg Feb 25, 2026
98c92df
jit-format
kg Feb 25, 2026
18e6520
Address copilot feedback
kg Feb 25, 2026
d7dc84d
Block attempts to mark a struct as multiply used since we can't alloc…
kg Feb 26, 2026
d1a2a40
Checkpoint cpobj rewrite
kg Feb 26, 2026
82211b9
Codegen fixes
kg Feb 26, 2026
46e99ee
Implement isContainableMemoryOp
kg Feb 26, 2026
2cf1f15
Fix emitattr
kg Feb 26, 2026
81b74b7
Fix local stores having their address contained
kg Feb 26, 2026
9b164d7
Assert that storeind's address isn't contained
kg Feb 26, 2026
960e033
Put comment back
kg Feb 26, 2026
38b85f3
jit-format
kg Feb 26, 2026
5a93cce
Centralize isContainableMemoryOp
kg Feb 27, 2026
bd8f88b
Use IsInvariantInRange
kg Feb 27, 2026
036958d
Remove assert
kg Feb 27, 2026
03ae6bb
Apply suggestions from code review
kg Feb 27, 2026
08c5ce6
Address PR feedback
kg Feb 27, 2026
1a46a5d
Mark store_blk as generating null checks
kg Feb 27, 2026
427e33d
Do IsInvariantInRange as an assert instead of the check
kg Feb 27, 2026
5f8983a
Address PR feedback by adding a GetLowering accessor
kg Feb 27, 2026
49ea954
Remove unnecessary ifdef
kg Feb 27, 2026
01b86f0
Remove commented dead code
kg Feb 27, 2026
1eeae1c
Address PR feedback
kg Feb 27, 2026
6d74664
jit-format
kg Feb 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 132 additions & 2 deletions src/coreclr/jit/codegenwasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
static const int LINEAR_MEMORY_INDEX = 0;

#ifdef TARGET_64BIT
static const instruction INS_I_load = INS_i64_load;
static const instruction INS_I_store = INS_i64_store;
static const instruction INS_I_const = INS_i64_const;
static const instruction INS_I_add = INS_i64_add;
static const instruction INS_I_mul = INS_i64_mul;
Expand All @@ -21,6 +23,8 @@ static const instruction INS_I_le_u = INS_i64_le_u;
static const instruction INS_I_ge_u = INS_i64_ge_u;
static const instruction INS_I_gt_u = INS_i64_gt_u;
#else // !TARGET_64BIT
static const instruction INS_I_load = INS_i32_load;
static const instruction INS_I_store = INS_i32_store;
static const instruction INS_I_const = INS_i32_const;
static const instruction INS_I_add = INS_i32_add;
static const instruction INS_I_mul = INS_i32_mul;
Expand Down Expand Up @@ -427,7 +431,9 @@ void CodeGen::WasmProduceReg(GenTree* node)
//
// If the operand is a candidate, we use that candidate's current register.
// Otherwise it must have been allocated into a temporary register initialized
// in 'WasmProduceReg'.
// in 'WasmProduceReg'. To do this, call treeNode->SetMultiplyUsed() during
// lowering and ensure that regalloc is updated to call 'ConsumeTemporaryRegForOperand'
// on the node(s) that need to be used multiple times.
//
// Arguments:
// operand - The operand node
Expand Down Expand Up @@ -1506,6 +1512,11 @@ void CodeGen::genCodeForBitCast(GenTreeOp* tree)
assert(tree->OperIs(GT_BITCAST));
genConsumeOperands(tree);

if (tree->gtGetOp1()->isContained())
{
NYI_WASM("Contained bitcast operands");
}

var_types toType = tree->TypeGet();
var_types fromType = genActualType(tree->gtGetOp1()->TypeGet());
assert(toType == genActualType(tree));
Expand Down Expand Up @@ -2014,6 +2025,8 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
GenTree* data = tree->Data();
GenTree* addr = tree->Addr();

assert(!addr->isContained());

// We must consume the operands in the proper execution order,
// so that liveness is updated appropriately.
genConsumeAddress(addr);
Expand Down Expand Up @@ -2418,11 +2431,128 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
default:
unreached();
}

genUpdateLife(blkOp);
}

//------------------------------------------------------------------------
// genCodeForCpObj: Produce code for a GT_STORE_BLK node that represents a cpobj operation.
//
// Arguments:
// cpObjNode - the node
//
void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
{
NYI_WASM("genCodeForCpObj");
GenTree* dstAddr = cpObjNode->Addr();
GenTree* source = cpObjNode->Data();
var_types srcAddrType = TYP_BYREF;
regNumber dstReg = GetMultiUseOperandReg(dstAddr);
unsigned dstOffset = 0;
regNumber srcReg;
unsigned srcOffset;

// Identify the register containing our source base address, either a multi-use
// reg representing the operand of a GT_IND, or the frame pointer for LCL_VAR/LCL_FLD.
if (source->OperIs(GT_IND))
{
source = source->gtGetOp1();
assert(!source->isContained());
srcAddrType = source->TypeGet();
srcReg = GetMultiUseOperandReg(source);
srcOffset = 0;

genEmitNullCheck(srcReg);
}
else
{
assert(source->OperIs(GT_LCL_FLD, GT_LCL_VAR));
GenTreeLclVarCommon* lclVar = source->AsLclVarCommon();
bool fpBased;
srcOffset = m_compiler->lvaFrameAddress(lclVar->GetLclNum(), &fpBased) + lclVar->GetLclOffs();
assert(fpBased);
srcReg = GetFramePointerReg();
}

// If the destination is on the stack we don't need the write barrier.
bool dstOnStack = cpObjNode->IsAddressNotOnHeap(m_compiler);
// We should have generated a memory.copy for this scenario in lowering.
assert(!dstOnStack);

#ifdef DEBUG
assert(!dstAddr->isContained());

// This GenTree node has data about GC pointers, this means we're dealing
// with CpObj.
assert(cpObjNode->GetLayout()->HasGCPtr());
#endif // DEBUG

genConsumeOperands(cpObjNode);

emitter* emit = GetEmitter();

genEmitNullCheck(dstReg);

// TODO-WASM: Remove the need to do this somehow
// The dst and src may be on the evaluation stack, but we can't reliably use them, so drop them.
emit->emitIns(INS_drop);
if (!source->isContained())
emit->emitIns(INS_drop);

if (cpObjNode->IsVolatile())
{
// TODO-WASM: Memory barrier
}

ClassLayout* layout = cpObjNode->GetLayout();
unsigned slots = layout->GetSlotCount();

emitAttr attrSrcAddr = emitActualTypeSize(srcAddrType);
emitAttr attrDstAddr = emitActualTypeSize(dstAddr->TypeGet());

unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
// Copy the pointer-sized non-gc-pointer slots one at a time using regular I-sized load/store pairs,
// and gc-pointer slots using a write barrier.
if (!layout->IsGCPtr(i))
{
// Do a pointer-sized load+store pair at the appropriate offset relative to dest and source
emit->emitIns_I(INS_local_get, attrDstAddr, WasmRegToIndex(dstReg));
emit->emitIns_I(INS_local_get, attrSrcAddr, WasmRegToIndex(srcReg));
emit->emitIns_I(INS_I_load, EA_PTRSIZE, srcOffset);
emit->emitIns_I(INS_I_store, EA_PTRSIZE, dstOffset);
}
else
{
// Load the sp onto the stack for the helper call.
// TODO-WASM: Implement a special calling convention for this helper that doesn't accept sp/pep.
emit->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetStackPointerReg()));
// Compute the actual dest/src of the slot being copied to pass to the helper.
emit->emitIns_I(INS_local_get, attrDstAddr, WasmRegToIndex(dstReg));
emit->emitIns_I(INS_I_const, attrDstAddr, dstOffset);
emit->emitIns(INS_I_add);
emit->emitIns_I(INS_local_get, attrSrcAddr, WasmRegToIndex(srcReg));
emit->emitIns_I(INS_I_const, attrSrcAddr, srcOffset);
emit->emitIns(INS_I_add);
// TODO-WASM: don't load PEP in genEmitHelperCall for write barriers.
// TODO-WASM-CQ: add a version of CORINFO_HELP_ASSIGN_BYREF that returns the updated dest/src
// pointers as a multi-value tuple and use it here.
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
}
++i;
srcOffset += TARGET_POINTER_SIZE;
dstOffset += TARGET_POINTER_SIZE;
}

assert(gcPtrCount == 0);

if (cpObjNode->IsVolatile())
{
// TODO-WASM: Memory barrier
}
}

//------------------------------------------------------------------------
Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -8680,6 +8680,11 @@ class Compiler
const ParameterRegisterLocalMapping* FindParameterRegisterLocalMappingByRegister(regNumber reg);
const ParameterRegisterLocalMapping* FindParameterRegisterLocalMappingByLocal(unsigned lclNum, unsigned offset);

Lowering* GetLowering() const
{
return m_pLowering;
}

/*
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12844,7 +12844,7 @@ void Compiler::gtDispTree(GenTree* tree,

#ifdef TARGET_WASM
case GenTreeBlk::BlkOpKindNativeOpcode:
printf(" (memory.copy|fill)");
printf(" (memory.%s)", tree->OperIsCopyBlkOp() ? "copy" : "fill");
break;
#endif

Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1228,6 +1228,11 @@ struct GenTree
// LIR flags
// These helper methods, along with the flag values they manipulate, are defined in lir.h
//
#ifdef TARGET_WASM
// Asks the register allocator to allocate a dedicated register for this node so we can use its value multiple
// times.
inline void SetMultiplyUsed();
#endif
// UnusedValue indicates that, although this node produces a value, it is unused.
inline void SetUnusedValue();
inline void ClearUnusedValue();
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/lir.h
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,14 @@ class LIR final
static GenTree* FirstNode(GenTree* node1, GenTree* node2);
};

#ifdef TARGET_WASM
inline void GenTree::SetMultiplyUsed()
{
assert(gtType != TYP_STRUCT);
gtLIRFlags |= LIR::Flags::MultiplyUsed;
}
#endif

inline void GenTree::SetUnusedValue()
{
gtLIRFlags |= LIR::Flags::UnusedValue;
Expand Down
63 changes: 49 additions & 14 deletions src/coreclr/jit/lowerwasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)

if (binOp->gtOverflow())
{
binOp->gtGetOp1()->gtLIRFlags |= LIR::Flags::MultiplyUsed;
binOp->gtGetOp2()->gtLIRFlags |= LIR::Flags::MultiplyUsed;
binOp->gtGetOp1()->SetMultiplyUsed();
binOp->gtGetOp2()->SetMultiplyUsed();
}

return binOp->gtNext;
Expand All @@ -188,12 +188,12 @@ void Lowering::LowerDivOrMod(GenTreeOp* divMod)
ExceptionSetFlags exSetFlags = divMod->OperExceptions(m_compiler);
if ((exSetFlags & ExceptionSetFlags::ArithmeticException) != ExceptionSetFlags::None)
{
divMod->gtGetOp1()->gtLIRFlags |= LIR::Flags::MultiplyUsed;
divMod->gtGetOp2()->gtLIRFlags |= LIR::Flags::MultiplyUsed;
divMod->gtGetOp1()->SetMultiplyUsed();
divMod->gtGetOp2()->SetMultiplyUsed();
}
else if ((exSetFlags & ExceptionSetFlags::DivideByZeroException) != ExceptionSetFlags::None)
{
divMod->gtGetOp2()->gtLIRFlags |= LIR::Flags::MultiplyUsed;
divMod->gtGetOp2()->SetMultiplyUsed();
}

ContainCheckDivOrMod(divMod);
Expand Down Expand Up @@ -244,6 +244,11 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
ClassLayout* layout = blkNode->GetLayout();
bool doCpObj = layout->HasGCPtr();

// If copying to the stack instead of the heap, we should treat it as a raw memcpy for
// smaller generated code and potentially better performance.
if (blkNode->IsAddressNotOnHeap(m_compiler))
doCpObj = false;

// CopyObj or CopyBlk
if (doCpObj)
{
Expand All @@ -254,6 +259,9 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
}

blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjUnroll;
dstAddr->SetMultiplyUsed();
if (src->OperIs(GT_IND))
src->gtGetOp1()->SetMultiplyUsed();
}
else
{
Expand Down Expand Up @@ -290,7 +298,7 @@ void Lowering::LowerCast(GenTree* tree)

if (tree->gtOverflow())
{
tree->gtGetOp1()->gtLIRFlags |= LIR::Flags::MultiplyUsed;
tree->gtGetOp1()->SetMultiplyUsed();
}
ContainCheckCast(tree->AsCast());
}
Expand Down Expand Up @@ -355,13 +363,6 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode)

// TODO-WASM-CQ: contain suitable LEAs here. Take note of the fact that for this to be correct we must prove the
// LEA doesn't overflow. It will involve creating a new frontend node to represent "nuw" (offset) addition.
GenTree* addr = indirNode->Addr();
if (addr->OperIs(GT_LCL_ADDR) && IsContainableLclAddr(addr->AsLclFld(), indirNode->Size()))
{
// These nodes go into an addr mode:
// - GT_LCL_ADDR is a stack addr mode.
MakeSrcContained(indirNode, addr);
}
}

//------------------------------------------------------------------------
Expand Down Expand Up @@ -485,6 +486,20 @@ void Lowering::AfterLowerBlock()
}
}

bool CanMoveNodePast(GenTree* node, GenTree* past)
{
bool result = node->IsInvariant() || node->isContained() ||
(node->OperIs(GT_LCL_VAR) &&
!m_lower->m_compiler->lvaGetDesc(node->AsLclVarCommon())->IsAddressExposed());

if (result)
{
assert(m_lower->IsInvariantInRange(node, past));
}

return result;
}

GenTree* StackifyTree(GenTree* root)
{
ArrayStack<GenTree*>* stack = &m_lower->m_stackificationStack;
Expand Down Expand Up @@ -512,6 +527,26 @@ void Lowering::AfterLowerBlock()
// instead be ifdef-ed out for WASM.
m_anyChanges = true;

// Invariant nodes can be safely moved by the stackifier with no side effects.
// For other nodes, the side effects would require us to turn them into a temporary local, but this
// is not possible for contained nodes like an IND inside a STORE_BLK. However, the few types of
// contained nodes we have in Wasm should be safe to move freely since the lack of 'dup' or
// persistent registers in Wasm means that the actual codegen will trigger the side effect(s) and
// store the result into a Wasm local for any later uses during the containing node's execution,
// i.e. cpobj where the src and dest get stashed at the start and then used as add operands
// repeatedly.
// Locals can also be safely moved as long as they aren't address-exposed due to local var nodes
// being implicitly pseudo-contained.
// TODO-WASM: Verify that it is actually safe to do this for all contained nodes.
if (CanMoveNodePast(node, prev->gtNext))
{
JITDUMP("Stackifier moving node [%06u] after [%06u]\n", Compiler::dspTreeID(node),
Compiler::dspTreeID(prev));
m_lower->BlockRange().Remove(node);
m_lower->BlockRange().InsertAfter(prev, node);
break;
}

JITDUMP("node==[%06u] prev==[%06u]\n", Compiler::dspTreeID(node), Compiler::dspTreeID(prev));
NYI_WASM("IR not in a stackified form");
}
Expand Down Expand Up @@ -549,6 +584,6 @@ void Lowering::AfterLowerArgsForCall(GenTreeCall* call)
{
// Prepare for explicit null check
CallArg* thisArg = call->gtArgs.GetThisArg();
thisArg->GetNode()->gtLIRFlags |= LIR::Flags::MultiplyUsed;
thisArg->GetNode()->SetMultiplyUsed();
}
}
36 changes: 0 additions & 36 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -644,42 +644,6 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval,
return newRP;
}

//------------------------------------------------------------------------
// IsContainableMemoryOp: Checks whether this is a memory op that can be contained.
//
// Arguments:
// node - the node of interest.
//
// Return value:
// True if this will definitely be a memory reference that could be contained.
//
// Notes:
// This differs from the isMemoryOp() method on GenTree because it checks for
// the case of doNotEnregister local. This won't include locals that
// for some other reason do not become register candidates, nor those that get
// spilled.
// Also, because we usually call this before we redo dataflow, any new lclVars
// introduced after the last dataflow analysis will not yet be marked lvTracked,
// so we don't use that.
//
bool LinearScan::isContainableMemoryOp(GenTree* node)
{
if (node->isMemoryOp())
{
return true;
}
if (node->IsLocal())
{
if (!enregisterLocalVars)
{
return true;
}
const LclVarDsc* varDsc = m_compiler->lvaGetDesc(node->AsLclVar());
return varDsc->lvDoNotEnregister;
}
return false;
}

//------------------------------------------------------------------------
// addKillForRegs: Adds a RefTypeKill ref position for the given registers.
//
Expand Down
Loading
Loading