Skip to content

Commit a065d79

Browse files
committed
[Wasm RyuJit] Enable native wasm fast tail calls
Set FEATURE_FASTTAILCALL=1 and FEATURE_TAILCALL_OPT=1. Fast tail calls lower to return_call / return_call_indirect. Tag the SP arg so codegen adds compLclFrameSize to undo the prolog adjustment, so the callee receives the incoming shadow-stack pointer.
1 parent ea282a9 commit a065d79

5 files changed

Lines changed: 73 additions & 5 deletions

File tree

src/coreclr/jit/codegenwasm.cpp

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -370,9 +370,15 @@ void CodeGen::genFnEpilog(BasicBlock* block)
370370

371371
bool jmpEpilog = block->HasFlag(BBF_HAS_JMP);
372372

373+
// BBF_HAS_JMP on wasm comes only from fast tail calls. The return_call already
374+
// left the function, but the body still needs an INS_end if this is the last block.
373375
if (jmpEpilog)
374376
{
375-
NYI_WASM("genFnEpilog: jmpEpilog");
377+
if (block->IsLast() || m_compiler->bbIsFuncletBeg(block->Next()))
378+
{
379+
instGen(INS_end);
380+
}
381+
return;
376382
}
377383

378384
// TODO-WASM: shadow stack maintenance
@@ -2408,6 +2414,19 @@ void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree)
24082414
{
24092415
assert(genIsValidReg(tree->gtSrcReg));
24102416
GetEmitter()->emitIns_I(INS_local_get, emitActualTypeSize(tree), WasmRegToIndex(tree->gtSrcReg));
2417+
2418+
if ((tree->gtLIRFlags & LIR::Flags::WasmFastTailCallSp) != 0)
2419+
{
2420+
// Fast tail call SP arg: undo the prolog SP adjustment (asserts funclet tail calls don't happen).
2421+
assert(m_compiler->funCurrentFuncIdx() == ROOT_FUNC_IDX);
2422+
assert(tree->gtSrcReg == GetStackPointerReg(m_compiler->funCurrentFuncIdx()));
2423+
if (m_compiler->compLclFrameSize != 0)
2424+
{
2425+
GetEmitter()->emitIns_I(INS_I_const, EA_PTRSIZE, m_compiler->compLclFrameSize);
2426+
GetEmitter()->emitIns(INS_I_add);
2427+
}
2428+
}
2429+
24112430
WasmProduceReg(tree);
24122431
}
24132432

@@ -2571,7 +2590,33 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
25712590

25722591
ArrayStack<CorInfoWasmType> typeStack(m_compiler->getAllocator(CMK_Codegen));
25732592

2574-
if (call->TypeIs(TYP_STRUCT))
2593+
// For a fast tail call wasm requires the callee's result type to match the enclosing
2594+
// function's, so derive it from the caller's signature (call->gtType is TYP_VOID).
2595+
if (params.isJump)
2596+
{
2597+
if (m_compiler->info.compRetBuffArg != BAD_VAR_NUM)
2598+
{
2599+
// The enclosing method returns its struct via a retbuf arg, so the wasm-level
2600+
// return is empty.
2601+
typeStack.Push(CORINFO_WASM_TYPE_VOID);
2602+
}
2603+
else if (m_compiler->info.compRetType == TYP_VOID)
2604+
{
2605+
typeStack.Push(CORINFO_WASM_TYPE_VOID);
2606+
}
2607+
else if (m_compiler->info.compRetType == TYP_STRUCT)
2608+
{
2609+
typeStack.Push(
2610+
m_compiler->info.compCompHnd->getWasmLowering(m_compiler->info.compMethodInfo->args.retTypeClass));
2611+
}
2612+
else
2613+
{
2614+
// Normalize small ints (bool/byte/short/...).
2615+
typeStack.Push((CorInfoWasmType)emitter::GetWasmValueTypeCode(
2616+
ActualTypeToWasmValueType(m_compiler->info.compRetType)));
2617+
}
2618+
}
2619+
else if (call->TypeIs(TYP_STRUCT))
25752620
{
25762621
typeStack.Push(m_compiler->info.compCompHnd->getWasmLowering(call->gtRetClsHnd));
25772622
}

src/coreclr/jit/lir.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,10 @@ class LIR final
4444
#ifdef TARGET_WASM
4545
MultiplyUsed = 0x08, // Set by lowering on nodes that the RA should allocate into
4646
// a dedicated register (WASM local), for multiple uses.
47-
#endif // TARGET_WASM
47+
48+
WasmFastTailCallSp = 0x10, // SP arg of a fast tail call; codegen adds compLclFrameSize
49+
// to undo the prolog's SP adjustment.
50+
#endif // TARGET_WASM
4851
};
4952
};
5053

src/coreclr/jit/morph.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4264,11 +4264,15 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason)
42644264
// the fast tail call cannot be performed. This is common to all platforms.
42654265
// Note that the GC'ness of on stack args need not match since the arg setup area is marked
42664266
// as non-interruptible for fast tail calls.
4267+
//
4268+
// Wasm passes args via fresh wasm locals, not the caller's stack, so this check doesn't apply.
4269+
#ifndef TARGET_WASM
42674270
if (calleeArgStackSize > callerArgStackSize)
42684271
{
42694272
reportFastTailCallDecision("Not enough incoming arg space");
42704273
return false;
42714274
}
4275+
#endif // !TARGET_WASM
42724276

42734277
// For Windows some struct parameters are copied on the local frame
42744278
// and then passed by reference. We cannot fast tail call in these situation

src/coreclr/jit/regallocwasm.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,22 @@ void WasmRegAlloc::CollectReferencesForCall(GenTreeCall* callNode)
571571
{
572572
ConsumeTemporaryRegForOperand(thisArg->GetNode() DEBUGARG("call this argument"));
573573
}
574+
575+
// Tag the SP arg of a fast tail call so codegen undoes the prolog SP adjustment.
576+
// The arg has been rewritten to GT_PHYSREG above (args are visited before the call).
577+
if (callNode->IsFastTailCall())
578+
{
579+
CallArg* const spArg = callNode->gtArgs.FindWellKnownArg(WellKnownArg::WasmShadowStackPointer);
580+
if (spArg != nullptr)
581+
{
582+
GenTree* const argNode = spArg->GetNode();
583+
assert(argNode != nullptr);
584+
assert(argNode->OperIs(GT_PHYSREG));
585+
assert(argNode->AsPhysReg()->gtSrcReg == m_perFuncletData[m_currentFunclet]->m_spReg);
586+
587+
argNode->gtLIRFlags |= LIR::Flags::WasmFastTailCallSp;
588+
}
589+
}
574590
}
575591

576592
//------------------------------------------------------------------------

src/coreclr/jit/targetwasm.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
#define FEATURE_FIXED_OUT_ARGS 0 // Preallocate the outgoing arg area in the prolog
2424
#define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers
2525
#define FEATURE_MULTIREG_STRUCT_PROMOTE 1 // True when we want to promote fields of a multireg struct into registers
26-
#define FEATURE_FASTTAILCALL 0 // Tail calls made as epilog+jmp
27-
#define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls.
26+
#define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp. On wasm the "jmp" is the native return_call / return_call_indirect opcode.
27+
#define FEATURE_TAILCALL_OPT 1 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls.
2828
#define FEATURE_IMPLICIT_BYREFS 1 // Support for struct parameters passed via pointers to shadow copies
2929
#define FEATURE_MULTIREG_ARGS_OR_RET 0 // Support for passing and/or returning single values in more than one register
3030
#define FEATURE_MULTIREG_ARGS 0 // Support for passing a single argument in more than one register

0 commit comments

Comments
 (0)