Skip to content

Commit 56f056e

Browse files
[RyuJit] Remove the single-IG prolog restriction (#126552)
Two parts: 1) Be smarter when comparing out-of-order IG numbers (by simply walking the IG list). 2) Remove jumps that encode the destination as `+/- <instr count>`, since they are only needed to work around the single-IG restriction. Not removing some of the underlying infrastructure for them quite yet since it's used in dumping. Also not taking advantage of this capability yet, except with the jumps.
1 parent 9075967 commit 56f056e

20 files changed

Lines changed: 302 additions & 498 deletions

src/coreclr/jit/codegenarm.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2659,9 +2659,11 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu
26592659
}
26602660
else
26612661
{
2662+
BasicBlock* loopHead = genCreateTempLabel();
2663+
genDefineInlineTempLabel(loopHead);
26622664
GetEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm); // zero stack slots
26632665
GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rCnt, 1, INS_FLAGS_SET);
2664-
GetEmitter()->emitIns_J(INS_bhi, NULL, -3);
2666+
GetEmitter()->emitIns_ShortJ(INS_bhi, loopHead);
26652667
uCntBytes %= REGSIZE_BYTES * 2;
26662668
}
26672669

src/coreclr/jit/codegenarm64.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1976,10 +1976,12 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu
19761976
GetEmitter()->emitIns_R_R_I_I(INS_bfm, EA_PTRSIZE, addrReg, REG_ZR, 0, 5);
19771977
// addrReg points at the beginning of a cache line.
19781978

1979+
BasicBlock* loopHead = genCreateTempLabel();
1980+
genDefineInlineTempLabel(loopHead);
19791981
GetEmitter()->emitIns_R(INS_dczva, EA_PTRSIZE, addrReg);
19801982
GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, addrReg, addrReg, 64);
19811983
GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, addrReg, endAddrReg);
1982-
GetEmitter()->emitIns_J(INS_blo, NULL, -4);
1984+
GetEmitter()->emitIns_ShortJ(INS_blo, loopHead);
19831985

19841986
addrReg = endAddrReg;
19851987
bytesToWrite = 64;
@@ -1998,12 +2000,14 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu
19982000

19992001
instGen_Set_Reg_To_Imm(EA_PTRSIZE, countReg, bytesToWrite - 64);
20002002

2003+
BasicBlock* loopHead = genCreateTempLabel();
2004+
genDefineInlineTempLabel(loopHead);
20012005
GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_16BYTE, zeroSimdReg, zeroSimdReg, addrReg, 32);
20022006
GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_16BYTE, zeroSimdReg, zeroSimdReg, addrReg, 64,
20032007
INS_OPTS_PRE_INDEX);
20042008

20052009
GetEmitter()->emitIns_R_R_I(INS_subs, EA_PTRSIZE, countReg, countReg, 64);
2006-
GetEmitter()->emitIns_J(INS_bge, NULL, -4);
2010+
GetEmitter()->emitIns_ShortJ(INS_bge, loopHead);
20072011

20082012
bytesToWrite %= 64;
20092013
}
@@ -3835,7 +3839,7 @@ void CodeGen::genAsyncResumeInfo(GenTreeVal* treeNode)
38353839
//
38363840
void CodeGen::genFtnEntry(GenTree* treeNode)
38373841
{
3838-
GetEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, GetEmitter()->emitPrologIG, treeNode->GetRegNum());
3842+
GetEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, GetEmitter()->emitGetFirstPrologIG(), treeNode->GetRegNum());
38393843
genProduceReg(treeNode);
38403844
}
38413845

@@ -5947,13 +5951,12 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
59475951
instGen_Set_Reg_To_Imm(EA_PTRSIZE, rOffset, -(ssize_t)pageSize);
59485952
instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(ssize_t)frameSize);
59495953

5950-
// There's a "virtual" label here. But we can't create a label in the prolog, so we use the magic
5951-
// `emitIns_J` with a negative `instrCount` to branch back a specific number of instructions.
5952-
5954+
BasicBlock* loopHead = genCreateTempLabel();
5955+
genDefineInlineTempLabel(loopHead);
59535956
GetEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, rOffset);
59545957
GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, rOffset, rOffset, pageSize);
59555958
GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, rLimit, rOffset); // If equal, we need to probe again
5956-
GetEmitter()->emitIns_J(INS_bls, NULL, -4);
5959+
GetEmitter()->emitIns_ShortJ(INS_bls, loopHead);
59575960

59585961
*pInitRegZeroed = false; // The initReg does not contain zero
59595962

src/coreclr/jit/codegencommon.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5602,7 +5602,7 @@ void CodeGen::genFnProlog()
56025602
// For OSR we may have a zero-length prolog. That's not supported
56035603
// when the method must report a generics context,/ so add a nop if so.
56045604
//
5605-
if (m_compiler->opts.IsOSR() && (GetEmitter()->emitGetPrologOffsetEstimate() == 0) &&
5605+
if (m_compiler->opts.IsOSR() && (GetEmitter()->emitGetCurrentCodeOffsetFrom(nullptr) == 0) &&
56065606
(m_compiler->lvaReportParamTypeArg() || m_compiler->lvaKeepAliveAndReportThis()))
56075607
{
56085608
JITDUMP("OSR: prolog was zero length and has generic context to report: adding nop to pad prolog.\n");
@@ -6046,6 +6046,7 @@ void CodeGen::genGeneratePrologsAndEpilogs()
60466046

60476047
GetEmitter()->emitStartPrologEpilogGeneration();
60486048

6049+
m_compiler->compCurBB = m_compiler->fgFirstBB; // Set the current BB for label creation.
60496050
gcInfo.gcResetForBB();
60506051
genFnProlog();
60516052

@@ -6073,6 +6074,8 @@ void CodeGen::genGeneratePrologsAndEpilogs()
60736074

60746075
GetEmitter()->emitFinishPrologEpilogGeneration();
60756076

6077+
m_compiler->compCurBB = nullptr;
6078+
60766079
#ifdef DEBUG
60776080
if (verbose)
60786081
{

src/coreclr/jit/codegenloongarch64.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2348,7 +2348,7 @@ void CodeGen::genAsyncResumeInfo(GenTreeVal* treeNode)
23482348
//
23492349
void CodeGen::genFtnEntry(GenTree* treeNode)
23502350
{
2351-
GetEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, GetEmitter()->emitPrologIG, treeNode->GetRegNum());
2351+
GetEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, GetEmitter()->emitGetFirstPrologIG(), treeNode->GetRegNum());
23522352
genProduceReg(treeNode);
23532353
}
23542354

src/coreclr/jit/codegenriscv64.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -822,14 +822,15 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu
822822
GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, rEndAddr, rEndAddr, rAddr);
823823
}
824824

825-
// TODO-RISCV64-RVC: Remove hardcoded branch offset here
825+
BasicBlock* loopHead = genCreateTempLabel();
826+
genDefineInlineTempLabel(loopHead);
826827
GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding);
827828
GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + REGSIZE_BYTES);
828829
GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + 2 * REGSIZE_BYTES);
829830
GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, rAddr, padding + 3 * REGSIZE_BYTES);
830831

831832
GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, rAddr, rAddr, 4 * REGSIZE_BYTES);
832-
GetEmitter()->emitIns_R_R_I(INS_bltu, EA_PTRSIZE, rAddr, rEndAddr, -5 << 2);
833+
GetEmitter()->emitIns_J_cond_la(INS_bltu, loopHead, rAddr, rEndAddr);
833834

834835
uLclBytes -= uLoopBytes;
835836
uAddrCurr = 0;
@@ -2323,7 +2324,7 @@ void CodeGen::genAsyncResumeInfo(GenTreeVal* treeNode)
23232324
//
23242325
void CodeGen::genFtnEntry(GenTree* treeNode)
23252326
{
2326-
GetEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, GetEmitter()->emitPrologIG, treeNode->GetRegNum());
2327+
GetEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, GetEmitter()->emitGetFirstPrologIG(), treeNode->GetRegNum());
23272328
genProduceReg(treeNode);
23282329
}
23292330

src/coreclr/jit/codegenxarch.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1696,7 +1696,7 @@ void CodeGen::inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock, bool isRemovableJ
16961696
#endif
16971697
#endif // !FEATURE_FIXED_OUT_ARGS
16981698

1699-
GetEmitter()->emitIns_J(emitter::emitJumpKindToIns(jmp), tgtBlock, 0, isRemovableJmpCandidate);
1699+
GetEmitter()->emitIns_J(emitter::emitJumpKindToIns(jmp), tgtBlock, /* keepShort*/ false, isRemovableJmpCandidate);
17001700
}
17011701

17021702
//------------------------------------------------------------------------
@@ -4330,7 +4330,7 @@ void CodeGen::genAsyncResumeInfo(GenTreeVal* treeNode)
43304330
//
43314331
void CodeGen::genFtnEntry(GenTree* treeNode)
43324332
{
4333-
GetEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, GetEmitter()->emitPrologIG, treeNode->GetRegNum());
4333+
GetEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, GetEmitter()->emitGetFirstPrologIG(), treeNode->GetRegNum());
43344334
genProduceReg(treeNode);
43354335
}
43364336

@@ -11488,7 +11488,10 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu
1148811488

1148911489
// Set loop counter
1149011490
emit->emitIns_R_I(INS_mov, EA_PTRSIZE, initReg, -(ssize_t)blkSize);
11491+
1149111492
// Loop start
11493+
BasicBlock* loopHead = genCreateTempLabel();
11494+
genDefineInlineTempLabel(loopHead);
1149211495
emit->emitIns_ARX_R(simdMov, EA_ATTR(XMM_REGSIZE_BYTES), zeroSIMDReg, frameReg, initReg, 1, alignedLclHi);
1149311496
emit->emitIns_ARX_R(simdMov, EA_ATTR(XMM_REGSIZE_BYTES), zeroSIMDReg, frameReg, initReg, 1,
1149411497
alignedLclHi + XMM_REGSIZE_BYTES);
@@ -11497,7 +11500,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu
1149711500

1149811501
emit->emitIns_R_I(INS_add, EA_PTRSIZE, initReg, XMM_REGSIZE_BYTES * 3);
1149911502
// Loop until counter is 0
11500-
emit->emitIns_J(INS_jne, nullptr, -5);
11503+
emit->emitIns_ShortJ(INS_jne, loopHead);
1150111504

1150211505
// initReg will be zero at end of the loop
1150311506
*pInitRegZeroed = true;

0 commit comments

Comments
 (0)