Skip to content

Commit a6d3a3e

Browse files
authored
[LoongArch64] Enable Runtime Async. (issue#124935) (#125114)
* Contributes to #124935 * Fix up hijacking on loongarch64 (preserve async continuation register). * Revert #121672 relate change of LA64 as `FEATURE_REMAP_FUNCTION` is not enabled on LA64. * Another part Place async resumption info in read-write section for LA64 is in #124213
1 parent 3090433 commit a6d3a3e

10 files changed

Lines changed: 58 additions & 88 deletions

File tree

docs/design/coreclr/botr/clr-abi.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ To return `Continuation` we use a volatile/calee-trash register that cannot be u
116116
| arm | r2 |
117117
| arm64 | x2 |
118118
| risc-v | a2 |
119+
| loongarch64 | a2 |
119120

120121
### Passing `Continuation` argument
121122
The `Continuation` parameter is passed at the same position as generic instantiation parameter or immediately after, if both present. For x86 the argument order is reversed.

src/coreclr/jit/codegenloongarch64.cpp

Lines changed: 6 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -557,14 +557,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
557557
{
558558
delta_PSP -= TARGET_POINTER_SIZE;
559559
}
560-
if ((m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR())
561-
{
562-
delta_PSP -= TARGET_POINTER_SIZE;
563-
}
564-
if ((m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR())
565-
{
566-
delta_PSP -= TARGET_POINTER_SIZE;
567-
}
568560

569561
funcletFrameSize = funcletFrameSize - delta_PSP;
570562
funcletFrameSize = roundUp((unsigned)funcletFrameSize, STACK_ALIGN);
@@ -3720,14 +3712,6 @@ int CodeGenInterface::genSPtoFPdelta() const
37203712
{
37213713
delta -= TARGET_POINTER_SIZE;
37223714
}
3723-
if ((m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR())
3724-
{
3725-
delta -= TARGET_POINTER_SIZE;
3726-
}
3727-
if ((m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR())
3728-
{
3729-
delta -= TARGET_POINTER_SIZE;
3730-
}
37313715

37323716
assert(delta >= 0);
37333717
return delta;
@@ -6098,41 +6082,15 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
60986082
// Now we can actually use those slot ID's to declare live ranges.
60996083
gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt);
61006084

6085+
#ifdef FEATURE_REMAP_FUNCTION
61016086
if (m_compiler->opts.compDbgEnC)
61026087
{
6103-
// what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
6104-
// which is:
6105-
// -return address
6106-
// -saved off RBP
6107-
// -saved 'this' pointer and bool for synchronized methods
6108-
6109-
// 4 slots for RBP + return address + RSI + RDI
6110-
int preservedAreaSize = 4 * REGSIZE_BYTES;
6111-
6112-
if (m_compiler->info.compFlags & CORINFO_FLG_SYNCH)
6113-
{
6114-
if (!(m_compiler->info.compFlags & CORINFO_FLG_STATIC))
6115-
{
6116-
preservedAreaSize += REGSIZE_BYTES;
6117-
}
6118-
6119-
preservedAreaSize += 1; // bool for synchronized methods
6120-
}
6121-
6122-
if (m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM)
6123-
{
6124-
preservedAreaSize += TARGET_POINTER_SIZE;
6125-
}
6126-
6127-
if (m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM)
6128-
{
6129-
preservedAreaSize += TARGET_POINTER_SIZE;
6130-
}
6131-
6132-
// Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
6133-
// frame
6134-
gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
6088+
// TODO: lvaMonAcquired, lvaAsyncExecutionContextVar and lvaAsyncExecutionContextVar locals are special
6089+
// that is necessary to allocate in the top of the stack frame and included as part of the EnC frame header
6090+
// for EnC to work.
6091+
NYI_LOONGARCH64("compDbgEnc in CodeGen::genCreateAndStoreGCInfo() ---unimplemented/unused on LA64 yet---");
61356092
}
6093+
#endif // FEATURE_REMAP_FUNCTION
61366094

61376095
if (m_compiler->opts.IsReversePInvoke())
61386096
{
@@ -6774,14 +6732,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
67746732
{
67756733
localFrameSize -= TARGET_POINTER_SIZE;
67766734
}
6777-
if ((m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR())
6778-
{
6779-
localFrameSize -= TARGET_POINTER_SIZE;
6780-
}
6781-
if ((m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR())
6782-
{
6783-
localFrameSize -= TARGET_POINTER_SIZE;
6784-
}
67856735

67866736
#ifdef DEBUG
67876737
if (m_compiler->opts.disAsm)
@@ -6848,14 +6798,6 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
68486798
{
68496799
localFrameSize -= TARGET_POINTER_SIZE;
68506800
}
6851-
if ((m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR())
6852-
{
6853-
localFrameSize -= TARGET_POINTER_SIZE;
6854-
}
6855-
if ((m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR())
6856-
{
6857-
localFrameSize -= TARGET_POINTER_SIZE;
6858-
}
68596801

68606802
JITDUMP("Frame type. #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; "
68616803
"localloc? %s\n",

src/coreclr/jit/lclvars.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4352,24 +4352,26 @@ void Compiler::lvaFixVirtualFrameOffsets()
43524352

43534353
if ((lvaMonAcquired != BAD_VAR_NUM) && !opts.IsOSR())
43544354
{
4355-
int offset = lvaTable[lvaMonAcquired].GetStackOffset() + delta;
4355+
int offset = lvaTable[lvaMonAcquired].GetStackOffset() + (compCalleeRegsPushed << 3);
43564356
lvaTable[lvaMonAcquired].SetStackOffset(offset);
43574357
delta += lvaLclStackHomeSize(lvaMonAcquired);
43584358
}
43594359

4360+
#ifndef TARGET_LOONGARCH64
43604361
if ((lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !opts.IsOSR())
43614362
{
4362-
int offset = lvaTable[lvaAsyncExecutionContextVar].GetStackOffset() + delta;
4363+
int offset = lvaTable[lvaAsyncExecutionContextVar].GetStackOffset() + (compCalleeRegsPushed << 3);
43634364
lvaTable[lvaAsyncExecutionContextVar].SetStackOffset(offset);
43644365
delta += lvaLclStackHomeSize(lvaAsyncExecutionContextVar);
43654366
}
43664367

43674368
if ((lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !opts.IsOSR())
43684369
{
4369-
int offset = lvaTable[lvaAsyncSynchronizationContextVar].GetStackOffset() + delta;
4370+
int offset = lvaTable[lvaAsyncSynchronizationContextVar].GetStackOffset() + (compCalleeRegsPushed << 3);
43704371
lvaTable[lvaAsyncSynchronizationContextVar].SetStackOffset(offset);
43714372
delta += lvaLclStackHomeSize(lvaAsyncSynchronizationContextVar);
43724373
}
4374+
#endif
43734375

43744376
JITDUMP("--- delta bump %d for FP frame\n", delta);
43754377
}

src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44
#include <unixasmmacros.inc>
55
#include "AsmOffsets.inc"
66

7-
#define PROBE_FRAME_SIZE 0x90 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) +
7+
#define PROBE_FRAME_SIZE 0xA0 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) +
88
// 9 * 8 for callee saved registers +
99
// 1 * 8 for caller SP +
10-
// 2 * 8 for int returns +
10+
// 3 * 8 for int returns (a0, a1, a2) +
11+
// 1 * 8 for alignment padding +
1112
// 2 * 8 for FP returns
1213

1314
// See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves return registers
@@ -37,13 +38,15 @@
3738

3839
// Slot at $sp+0x68 is reserved for caller sp
3940

40-
// Save the integer return registers
41+
// Save the integer return registers, a2 might contain an objectref (async continuation)
4142
st.d $a0, $sp, 0x70
4243
st.d $a1, $sp, 0x78
44+
st.d $a2, $sp, 0x80
45+
// Slot at [sp, #0x88] is alignment padding
4346

4447
// Save the FP return registers
45-
fst.d $f0, $sp, 0x80
46-
fst.d $f1, $sp, 0x88
48+
fst.d $f0, $sp, 0x90
49+
fst.d $f1, $sp, 0x98
4750

4851
// Perform the rest of the PInvokeTransitionFrame initialization.
4952
st.d \threadReg, $sp, OFFSETOF__PInvokeTransitionFrame__m_pThread // Thread * (unused by stackwalker)
@@ -66,10 +69,11 @@
6669
// Restore the integer return registers
6770
ld.d $a0, $sp, 0x70
6871
ld.d $a1, $sp, 0x78
72+
ld.d $a2, $sp, 0x80
6973

7074
// Restore the FP return registers
71-
fld.d $f0, $sp, 0x80
72-
fld.d $f1, $sp, 0x88
75+
fld.d $f0, $sp, 0x90
76+
fld.d $f1, $sp, 0x98
7377

7478
// Restore callee saved registers
7579
EPILOG_RESTORE_REG_PAIR 23, 24, 0x20
@@ -89,25 +93,26 @@
8993
// All registers correct for return to the original return address.
9094
//
9195
// Register state on exit:
92-
// a2: thread pointer
96+
// a4: thread pointer
97+
// a0, a1, a2: preserved
9398
//
9499
.macro FixupHijackedCallstack
95100

96-
// a2 <- GetThread()
97-
INLINE_GETTHREAD $a2
101+
// a4 <- GetThread()
102+
INLINE_GETTHREAD $a4
98103

99104
//
100105
// Fix the stack by restoring the original return address
101106
//
102107
// Load m_pvHijackedReturnAddress
103-
ld.d $ra, $a2, OFFSETOF__Thread__m_pvHijackedReturnAddress
108+
ld.d $ra, $a4, OFFSETOF__Thread__m_pvHijackedReturnAddress
104109

105110
//
106111
// Clear hijack state
107112
//
108113
// Clear m_ppvHijackedReturnAddressLocation and m_pvHijackedReturnAddress
109-
st.d $zero, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation
110-
st.d $zero, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + 8
114+
st.d $zero, $a4, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation
115+
st.d $zero, $a4, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + 8
111116
.endm
112117

113118
//
@@ -122,16 +127,16 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler
122127
jirl $r0, $ra, 0
123128

124129
LOCAL_LABEL(WaitForGC):
125-
li.d $t3, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5 + PTFF_THREAD_HIJACK)
130+
li.d $t3, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5 + PTFF_SAVE_R6 + PTFF_THREAD_HIJACK)
126131
b C_FUNC(RhpWaitForGC)
127132
NESTED_END RhpGcProbeHijack
128133

129134
.global C_FUNC(RhpThrowHwEx)
130135

131136
NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler
132-
PUSH_PROBE_FRAME $a2, $a3, $t3
137+
PUSH_PROBE_FRAME $a4, $a3, $t3
133138

134-
ld.d $a0, $a2, OFFSETOF__Thread__m_pDeferredTransitionFrame
139+
ld.d $a0, $a4, OFFSETOF__Thread__m_pDeferredTransitionFrame
135140
bl C_FUNC(RhpWaitForGC2)
136141

137142
POP_PROBE_FRAME

src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ C_FUNC(\Name):
198198
#define PTFF_SAVE_SP 0x00000200
199199
#define PTFF_SAVE_R4 0x00000800
200200
#define PTFF_SAVE_R5 0x00001000
201+
#define PTFF_SAVE_R6 0x00002000
201202
#define PTFF_SAVE_ALL_PRESERVED 0x000001FF // NOTE: r23-r31
202203
#define PTFF_THREAD_HIJACK 0x80000000
203204

src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -449,14 +449,16 @@ private static unsafe void PutLoongArch64PC12(uint* pCode, long imm)
449449

450450
Debug.Assert((pcInstr & 0xFE000000) == 0x1a000000); // Must be pcalau12i
451451

452+
pcInstr &= 0xFE00001F; // keep bits 31-25, 4-0
452453
// Assemble the pc-relative high 20 bits of 'imm' into the pcalau12i instruction
453454
pcInstr |= (uint)((imm >> 7) & 0x1FFFFE0);
454455

455456
*pCode = pcInstr; // write the assembled instruction
456457

457458
pcInstr = *(pCode + 1);
458459

459-
// Assemble the pc-relative low 12 bits of 'imm' into the addid or ld instruction
460+
pcInstr &= 0xFFC003FF; // keep bits 31-22, 9-0
461+
// Assemble the pc-relative low 12 bits of 'imm' into the addi.d or ld instruction
460462
pcInstr |= (uint)((imm & 0xFFF) << 10);
461463

462464
*(pCode + 1) = pcInstr; // write the assembled instruction
@@ -493,13 +495,15 @@ private static unsafe void PutLoongArch64JIR(uint* pCode, long imm38)
493495
long imm = imm38 + relOff;
494496
relOff = (((imm & 0x1ffff) - relOff) >> 2) & 0xffff;
495497

498+
pcInstr &= 0xFE00001F; // keep bits 31-25, 4-0
496499
// Assemble the pc-relative high 20 bits of 'imm38' into the pcaddu18i instruction
497500
pcInstr |= (uint)(((imm >> 18) & 0xFFFFF) << 5);
498501

499502
*pCode = pcInstr; // write the assembled instruction
500503

501504
pcInstr = *(pCode + 1);
502505

506+
pcInstr &= 0xFC0003FF; // keep bits 31-26, 9-0
503507
// Assemble the pc-relative low 18 bits of 'imm38' into the jirl instruction
504508
pcInstr |= (uint)(relOff << 10);
505509

src/coreclr/utilcode/util.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2185,13 +2185,15 @@ void PutLoongArch64PC12(UINT32 * pCode, INT64 imm)
21852185

21862186
_ASSERTE((pcInstr & 0xFE000000) == 0x1a000000); // Must be pcalau12i
21872187

2188+
pcInstr &= 0xFE00001F; // keep bits 31-25, 4-0
21882189
// Assemble the pc-relative high 20 bits of 'imm' into the pcalau12i instruction
21892190
pcInstr |= (UINT32)((imm >> 7) & 0x1FFFFE0);
21902191

21912192
*pCode = pcInstr; // write the assembled instruction
21922193

21932194
pcInstr = *(pCode + 1);
21942195

2196+
pcInstr &= 0xFFC003FF; // keep bits 31-22, 9-0
21952197
// Assemble the pc-relative low 12 bits of 'imm' into the addid or ld instruction
21962198
pcInstr |= (UINT32)((imm & 0xFFF) << 10);
21972199

@@ -2218,13 +2220,15 @@ void PutLoongArch64JIR(UINT32 * pCode, INT64 imm38)
22182220
INT64 imm = imm38 + relOff;
22192221
relOff = (((imm & 0x1ffff) - relOff) >> 2) & 0xffff;
22202222

2223+
pcInstr &= 0xFE00001F; // keep bits 31-25, 4-0
22212224
// Assemble the pc-relative high 20 bits of 'imm38' into the pcaddu18i instruction
22222225
pcInstr |= (UINT32)(((imm >> 18) & 0xFFFFF) << 5);
22232226

22242227
*pCode = pcInstr; // write the assembled instruction
22252228

22262229
pcInstr = *(pCode + 1);
22272230

2231+
pcInstr &= 0xFC0003FF; // keep bits 31-26, 9-0
22282232
// Assemble the pc-relative low 18 bits of 'imm38' into the jirl instruction
22292233
pcInstr |= (UINT32)(relOff << 10);
22302234

src/coreclr/vm/loongarch64/asmhelpers.S

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -637,10 +637,12 @@ NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler
637637
// save any integral return value(s)
638638
st.d $a0, $sp, 88
639639
st.d $a1, $sp, 96
640+
// save async continuation return value
641+
st.d $a2, $sp, 104
640642

641643
// save any FP return value(s)
642-
fst.d $f0, $sp, 104
643-
fst.d $f1, $sp, 112
644+
fst.d $f0, $sp, 112
645+
fst.d $f1, $sp, 120
644646

645647
ori $a0, $sp, 0
646648
bl C_FUNC(OnHijackWorker)
@@ -650,10 +652,12 @@ NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler
650652
// restore any integral return value(s)
651653
ld.d $a0, $sp, 88
652654
ld.d $a1, $sp, 96
655+
// restore async continuation return value
656+
ld.d $a2, $sp, 104
653657

654658
// restore any FP return value(s)
655-
fld.d $f0, $sp, 104
656-
fld.d $f1, $sp, 112
659+
fld.d $f0, $sp, 112
660+
fld.d $f1, $sp, 120
657661

658662
EPILOG_RESTORE_REG_PAIR 23, 24, 16
659663
EPILOG_RESTORE_REG_PAIR 25, 26, 32

src/coreclr/vm/loongarch64/cgencpu.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,11 @@ struct HijackArgs
429429
size_t ReturnValue[2];
430430
};
431431
union
432+
{
433+
DWORD64 A2;
434+
size_t AsyncRet;
435+
};
436+
union
432437
{
433438
struct {
434439
DWORD64 F0;

src/coreclr/vm/loongarch64/stubs.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,9 +474,11 @@ void HijackFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats
474474

475475
pRD->pCurrentContext->A0 = m_Args->A0;
476476
pRD->pCurrentContext->A1 = m_Args->A1;
477+
pRD->pCurrentContext->A2 = m_Args->A2;
477478

478479
pRD->volatileCurrContextPointers.A0 = &m_Args->A0;
479480
pRD->volatileCurrContextPointers.A1 = &m_Args->A1;
481+
pRD->volatileCurrContextPointers.A2 = &m_Args->A2;
480482

481483
pRD->pCurrentContext->S0 = m_Args->S0;
482484
pRD->pCurrentContext->S1 = m_Args->S1;

0 commit comments

Comments
 (0)