Skip to content
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
4c8682d
PR feedback from SingleAccretion
kg Feb 23, 2026
76e30d0
Address copilot feedback
kg Feb 25, 2026
90f2757
Repair merge damage
kg Feb 25, 2026
8335ca1
jit-format
kg Feb 25, 2026
7029698
loadStructWithRefs compiles now
kg Feb 25, 2026
3c5e30b
When doing a cpobj to the stack just generate a memcpy instead
kg Feb 25, 2026
98c92df
jit-format
kg Feb 25, 2026
18e6520
Address copilot feedback
kg Feb 25, 2026
d7dc84d
Block attempts to mark a struct as multiply used since we can't alloc…
kg Feb 26, 2026
d1a2a40
Checkpoint cpobj rewrite
kg Feb 26, 2026
82211b9
Codegen fixes
kg Feb 26, 2026
46e99ee
Implement isContainableMemoryOp
kg Feb 26, 2026
2cf1f15
Fix emitattr
kg Feb 26, 2026
81b74b7
Fix local stores having their address contained
kg Feb 26, 2026
9b164d7
Assert that storeind's address isn't contained
kg Feb 26, 2026
960e033
Put comment back
kg Feb 26, 2026
38b85f3
jit-format
kg Feb 26, 2026
5a93cce
Centralize isContainableMemoryOp
kg Feb 27, 2026
bd8f88b
Use IsInvariantInRange
kg Feb 27, 2026
036958d
Remove assert
kg Feb 27, 2026
03ae6bb
Apply suggestions from code review
kg Feb 27, 2026
08c5ce6
Address PR feedback
kg Feb 27, 2026
1a46a5d
Mark store_blk as generating null checks
kg Feb 27, 2026
427e33d
Do IsInvariantInRange as an assert instead of the check
kg Feb 27, 2026
5f8983a
Address PR feedback by adding a GetLowering accessor
kg Feb 27, 2026
49ea954
Remove unnecessary ifdef
kg Feb 27, 2026
01b86f0
Remove commented dead code
kg Feb 27, 2026
1eeae1c
Address PR feedback
kg Feb 27, 2026
6d74664
jit-format
kg Feb 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 125 additions & 2 deletions src/coreclr/jit/codegenwasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
static const int LINEAR_MEMORY_INDEX = 0;

#ifdef TARGET_64BIT
static const instruction INS_I_load = INS_i64_load;
static const instruction INS_I_store = INS_i64_store;
static const instruction INS_I_const = INS_i64_const;
static const instruction INS_I_add = INS_i64_add;
static const instruction INS_I_mul = INS_i64_mul;
Expand All @@ -21,6 +23,8 @@ static const instruction INS_I_le_u = INS_i64_le_u;
static const instruction INS_I_ge_u = INS_i64_ge_u;
static const instruction INS_I_gt_u = INS_i64_gt_u;
#else // !TARGET_64BIT
static const instruction INS_I_load = INS_i32_load;
static const instruction INS_I_store = INS_i32_store;
static const instruction INS_I_const = INS_i32_const;
static const instruction INS_I_add = INS_i32_add;
static const instruction INS_I_mul = INS_i32_mul;
Expand Down Expand Up @@ -427,7 +431,9 @@ void CodeGen::WasmProduceReg(GenTree* node)
//
// If the operand is a candidate, we use that candidate's current register.
// Otherwise it must have been allocated into a temporary register initialized
// in 'WasmProduceReg'.
// in 'WasmProduceReg'. To do this, call treeNode->SetMultiplyUsed() during
// lowering or other pre-regalloc phases, and ensure that regalloc is updated to
// call CollectReferences on the node(s) that need to be used multiple times.
Comment thread
kg marked this conversation as resolved.
Outdated
//
// Arguments:
// operand - The operand node
Expand Down Expand Up @@ -2014,6 +2020,8 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
GenTree* data = tree->Data();
GenTree* addr = tree->Addr();

assert(!addr->isContained());

// We must consume the operands in the proper execution order,
// so that liveness is updated appropriately.
genConsumeAddress(addr);
Expand Down Expand Up @@ -2420,9 +2428,124 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
}
}

//------------------------------------------------------------------------
// genCodeForCpObj: Produce code for a GT_STORE_BLK node that represents a cpobj operation.
//
// Arguments:
// cpObjNode - the node
//
void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
Comment thread
kg marked this conversation as resolved.
{
NYI_WASM("genCodeForCpObj");
GenTree* dstAddr = cpObjNode->Addr();
GenTree* source = cpObjNode->Data();
var_types srcAddrType = TYP_BYREF;
regNumber dstReg = GetMultiUseOperandReg(dstAddr);
unsigned dstOffset = 0;
regNumber srcReg;
unsigned srcOffset;

// Identify the register containing our source base address, either a multi-use
// reg representing the operand of a GT_IND, or the frame pointer for LCL_VAR/LCL_FLD.
if (source->OperIs(GT_IND))
{
Comment thread
kg marked this conversation as resolved.
source = source->gtGetOp1();
assert(!source->isContained());
srcAddrType = source->TypeGet();
srcReg = GetMultiUseOperandReg(source);
srcOffset = 0;
}
else
{
noway_assert(source->OperIs(GT_LCL_FLD, GT_LCL_VAR));
Comment thread
kg marked this conversation as resolved.
Outdated
GenTreeLclVarCommon* lclVar = source->AsLclVarCommon();
bool fpBased;
srcOffset = m_compiler->lvaFrameAddress(lclVar->GetLclNum(), &fpBased) + lclVar->GetLclOffs();
noway_assert(fpBased);
Comment thread
kg marked this conversation as resolved.
Outdated
srcReg = GetFramePointerReg();
}

// If the destination is on the stack we don't need the write barrier.
bool dstOnStack = cpObjNode->IsAddressNotOnHeap(m_compiler);
// We should have generated a memory.copy for this scenario in lowering.
assert(!dstOnStack);

#ifdef DEBUG
assert(!dstAddr->isContained());

// This GenTree node has data about GC pointers, this means we're dealing
// with CpObj.
assert(cpObjNode->GetLayout()->HasGCPtr());
#endif // DEBUG

genConsumeOperands(cpObjNode);

emitter* emit = GetEmitter();

// TODO-WASM: Implicit null check

// TODO-WASM: Remove the need to do this somehow
// The dst and src may be on the evaluation stack, but we can't reliably use them, so drop them.
emit->emitIns(INS_drop);
if (!source->isContained())
emit->emitIns(INS_drop);
Comment thread
kg marked this conversation as resolved.

if (cpObjNode->IsVolatile())
{
// TODO-WASM: Memory barrier
}

ClassLayout* layout = cpObjNode->GetLayout();
unsigned slots = layout->GetSlotCount();

emitAttr attrSrcAddr = emitActualTypeSize(srcAddrType);
emitAttr attrDstAddr = emitActualTypeSize(dstAddr->TypeGet());

unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
// Copy the pointer-sized non-gc-pointer slots one at a time using regular I-sized load/store pairs,
// and gc-pointer slots using a write barrier.
if (!layout->IsGCPtr(i))
{
// Do a pointer-sized load+store pair at the appropriate offset relative to dest and source
emit->emitIns_I(INS_local_get, attrDstAddr, WasmRegToIndex(dstReg));
emit->emitIns_I(INS_local_get, attrSrcAddr, WasmRegToIndex(srcReg));
emit->emitIns_I(INS_I_load, EA_PTRSIZE, srcOffset);
emit->emitIns_I(INS_I_store, EA_PTRSIZE, dstOffset);
}
else
{
// Load the sp onto the stack for the helper call.
// TODO-WASM: Implement a special calling convention for this helper that doesn't accept sp/pep.
emit->emitIns_I(INS_local_get, EA_PTRSIZE, WasmRegToIndex(GetStackPointerReg()));
Comment thread
kg marked this conversation as resolved.
// Compute the actual dest/src of the slot being copied to pass to the helper.
emit->emitIns_I(INS_local_get, attrDstAddr, WasmRegToIndex(dstReg));
emit->emitIns_I(INS_I_const, attrDstAddr, dstOffset);
emit->emitIns(INS_I_add);
emit->emitIns_I(INS_local_get, attrSrcAddr, WasmRegToIndex(srcReg));
emit->emitIns_I(INS_I_const, attrSrcAddr, srcOffset);
emit->emitIns(INS_I_add);
// TODO-WASM: Load the PEP value for the helper onto the stack here? Right now genEmitHelperCall does it.
// Call the byref assign helper. On other targets this updates the dst/src regs but here it won't,
// so we have to do the local.get+i32.const+i32.add dance every time.
Comment thread
kg marked this conversation as resolved.
Outdated
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
Comment thread
kg marked this conversation as resolved.
Comment thread
kg marked this conversation as resolved.
Outdated
}
++i;
srcOffset += TARGET_POINTER_SIZE;
dstOffset += TARGET_POINTER_SIZE;
}

assert(gcPtrCount == 0);

if (cpObjNode->IsVolatile())
{
// TODO-WASM: Memory barrier
}

WasmProduceReg(cpObjNode);
Comment thread
kg marked this conversation as resolved.
Outdated
}

//------------------------------------------------------------------------
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -2566,6 +2566,10 @@ class Compiler
friend class ReplaceVisitor;
friend class FlowGraphNaturalLoop;

#ifdef TARGET_WASM
friend class WasmRegAlloc; // For m_pLowering
Comment thread
kg marked this conversation as resolved.
Outdated
#endif

#ifdef FEATURE_HW_INTRINSICS
friend struct GenTreeHWIntrinsic;
friend struct HWIntrinsicInfo;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12844,7 +12844,7 @@ void Compiler::gtDispTree(GenTree* tree,

#ifdef TARGET_WASM
case GenTreeBlk::BlkOpKindNativeOpcode:
printf(" (memory.copy|fill)");
printf(" (memory.%s)", tree->OperIsCopyBlkOp() ? "copy" : "fill");
break;
#endif

Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1228,6 +1228,11 @@ struct GenTree
// LIR flags
// These helper methods, along with the flag values they manipulate, are defined in lir.h
//
#ifdef TARGET_WASM
// Asks the register allocator to allocate a dedicated register for this node so we can use its value multiple
// times.
inline void SetMultiplyUsed();
Comment thread
kg marked this conversation as resolved.
#endif
// UnusedValue indicates that, although this node produces a value, it is unused.
inline void SetUnusedValue();
inline void ClearUnusedValue();
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/lir.h
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,14 @@ class LIR final
static GenTree* FirstNode(GenTree* node1, GenTree* node2);
};

#ifdef TARGET_WASM
inline void GenTree::SetMultiplyUsed()
{
assert(gtType != TYP_STRUCT);
gtLIRFlags |= LIR::Flags::MultiplyUsed;
}
#endif

inline void GenTree::SetUnusedValue()
{
gtLIRFlags |= LIR::Flags::UnusedValue;
Expand Down
85 changes: 71 additions & 14 deletions src/coreclr/jit/lowerwasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)

if (binOp->gtOverflow())
{
binOp->gtGetOp1()->gtLIRFlags |= LIR::Flags::MultiplyUsed;
binOp->gtGetOp2()->gtLIRFlags |= LIR::Flags::MultiplyUsed;
binOp->gtGetOp1()->SetMultiplyUsed();
binOp->gtGetOp2()->SetMultiplyUsed();
}

return binOp->gtNext;
Expand All @@ -188,12 +188,12 @@ void Lowering::LowerDivOrMod(GenTreeOp* divMod)
ExceptionSetFlags exSetFlags = divMod->OperExceptions(m_compiler);
if ((exSetFlags & ExceptionSetFlags::ArithmeticException) != ExceptionSetFlags::None)
{
divMod->gtGetOp1()->gtLIRFlags |= LIR::Flags::MultiplyUsed;
divMod->gtGetOp2()->gtLIRFlags |= LIR::Flags::MultiplyUsed;
divMod->gtGetOp1()->SetMultiplyUsed();
divMod->gtGetOp2()->SetMultiplyUsed();
}
else if ((exSetFlags & ExceptionSetFlags::DivideByZeroException) != ExceptionSetFlags::None)
{
divMod->gtGetOp2()->gtLIRFlags |= LIR::Flags::MultiplyUsed;
divMod->gtGetOp2()->SetMultiplyUsed();
}

ContainCheckDivOrMod(divMod);
Expand Down Expand Up @@ -244,6 +244,11 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
ClassLayout* layout = blkNode->GetLayout();
bool doCpObj = layout->HasGCPtr();

// If copying to the stack instead of the heap, we should treat it as a raw memcpy for
// smaller generated code and potentially better performance.
if (blkNode->IsAddressNotOnHeap(m_compiler))
doCpObj = false;
Comment thread
kg marked this conversation as resolved.

// CopyObj or CopyBlk
if (doCpObj)
{
Expand All @@ -254,6 +259,9 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
}
Comment thread
kg marked this conversation as resolved.

blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjUnroll;
dstAddr->SetMultiplyUsed();
if (src->OperIs(GT_IND))
src->gtGetOp1()->SetMultiplyUsed();
Comment thread
kg marked this conversation as resolved.
}
else
{
Expand Down Expand Up @@ -290,7 +298,7 @@ void Lowering::LowerCast(GenTree* tree)

if (tree->gtOverflow())
{
tree->gtGetOp1()->gtLIRFlags |= LIR::Flags::MultiplyUsed;
tree->gtGetOp1()->SetMultiplyUsed();
}
ContainCheckCast(tree->AsCast());
}
Expand Down Expand Up @@ -355,13 +363,6 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode)

// TODO-WASM-CQ: contain suitable LEAs here. Take note of the fact that for this to be correct we must prove the
// LEA doesn't overflow. It will involve creating a new frontend node to represent "nuw" (offset) addition.
GenTree* addr = indirNode->Addr();
if (addr->OperIs(GT_LCL_ADDR) && IsContainableLclAddr(addr->AsLclFld(), indirNode->Size()))
{
// These nodes go into an addr mode:
// - GT_LCL_ADDR is a stack addr mode.
MakeSrcContained(indirNode, addr);
}
}

//------------------------------------------------------------------------
Expand Down Expand Up @@ -512,6 +513,62 @@ void Lowering::AfterLowerBlock()
// instead be ifdef-ed out for WASM.
m_anyChanges = true;

// Invariant nodes can be safely moved by the stackifier with no side effects.
// For other nodes, the side effects would require us to turn them into a temporary local, but this
// is not possible for contained nodes like an IND inside a STORE_BLK. However, the few types of
// contained nodes we have in Wasm should be safe to move freely since the lack of 'dup' or
// persistent registers in Wasm means that the actual codegen will trigger the side effect(s) and
// store the result into a Wasm local for any later uses during the containing node's execution,
// i.e. cpobj where the src and dest get stashed at the start and then used as add operands
// repeatedly.
// Locals can also be safely moved as long as they aren't address-exposed due to local var nodes
// being
// implicitly pseudo-contained.
Comment thread
kg marked this conversation as resolved.
Outdated
// TODO-WASM: Verify that it is actually safe to do this for all contained nodes.
Comment thread
kg marked this conversation as resolved.
if (
m_lower->IsInvariantInRange(node, prev->gtNext) ||
node->isContained() ||
(node->OperIs(GT_LCL_VAR) &&
!m_lower->m_compiler->lvaGetDesc(node->AsLclVarCommon())->IsAddressExposed()))
Comment thread
kg marked this conversation as resolved.
Outdated
{
JITDUMP("Stackifier moving node [%06u] after [%06u]\n", Compiler::dspTreeID(node),
Compiler::dspTreeID(prev));
m_lower->BlockRange().Remove(node);
m_lower->BlockRange().InsertAfter(prev, node);
break;
}

/*
else
{
// To resolve this scenario we have two options:
// 1. We try moving the whole tree rooted at `node`.
// To avoid quadratic behavior, we first stackify it and collect all the side effects
// from it. Then we check for interference of those side effects with nodes between
// 'node' and 'prev'.
// 2. Failing that, we insert a temporary ('ReplaceWithLclVar') for 'node'.
// To avoid explosion of temporaries, we maintain a busy/free set of them.
// For now, for simplicity we are implementing #2 only.

LIR::Use nodeUse;
// FIXME-WASM: TryGetUse is inefficient here, replace it with something more optimal
if (!m_lower->BlockRange().TryGetUse(node, &nodeUse))
{
JITDUMP("node==[%06u] prev==[%06u]\n", Compiler::dspTreeID(node),
Compiler::dspTreeID(prev)); NYI_WASM("Could not get a LIR::Use for the node to be moved by the
stackifier");
Comment thread
kg marked this conversation as resolved.
Outdated
}

unsigned lclNum = nodeUse.ReplaceWithLclVar(m_lower->m_compiler);
GenTree* newNode = nodeUse.Def();
JITDUMP("Stackifier replaced node [%06u] with lcl var %u\n", Compiler::dspTreeID(node), lclNum);
m_lower->BlockRange().Remove(newNode);
m_lower->BlockRange().InsertAfter(prev, newNode);
JITDUMP("Stackifier moved new node [%06u] after [%06u]\n", Compiler::dspTreeID(newNode),
Compiler::dspTreeID(prev)); break;
}
*/
Comment thread
kg marked this conversation as resolved.
Outdated
Comment thread
kg marked this conversation as resolved.
Outdated

JITDUMP("node==[%06u] prev==[%06u]\n", Compiler::dspTreeID(node), Compiler::dspTreeID(prev));
NYI_WASM("IR not in a stackified form");
}
Expand Down Expand Up @@ -549,6 +606,6 @@ void Lowering::AfterLowerArgsForCall(GenTreeCall* call)
{
// Prepare for explicit null check
CallArg* thisArg = call->gtArgs.GetThisArg();
thisArg->GetNode()->gtLIRFlags |= LIR::Flags::MultiplyUsed;
thisArg->GetNode()->SetMultiplyUsed();
}
}
36 changes: 0 additions & 36 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -644,42 +644,6 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval,
return newRP;
}

//------------------------------------------------------------------------
// IsContainableMemoryOp: Checks whether this is a memory op that can be contained.
//
// Arguments:
// node - the node of interest.
//
// Return value:
// True if this will definitely be a memory reference that could be contained.
//
// Notes:
// This differs from the isMemoryOp() method on GenTree because it checks for
// the case of doNotEnregister local. This won't include locals that
// for some other reason do not become register candidates, nor those that get
// spilled.
// Also, because we usually call this before we redo dataflow, any new lclVars
// introduced after the last dataflow analysis will not yet be marked lvTracked,
// so we don't use that.
//
bool LinearScan::isContainableMemoryOp(GenTree* node)
{
if (node->isMemoryOp())
{
return true;
}
if (node->IsLocal())
{
if (!enregisterLocalVars)
{
return true;
}
const LclVarDsc* varDsc = m_compiler->lvaGetDesc(node->AsLclVar());
return varDsc->lvDoNotEnregister;
}
return false;
}

//------------------------------------------------------------------------
// addKillForRegs: Adds a RefTypeKill ref position for the given registers.
//
Expand Down
Loading
Loading