From f30defb1bd3af4361d4bfb43f45979a7d8098259 Mon Sep 17 00:00:00 2001 From: BoyBaykiller Date: Sat, 23 May 2026 05:51:18 +0200 Subject: [PATCH 1/9] * call tailMergePreds repeatedly --- src/coreclr/jit/fgopt.cpp | 62 +++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index ffa3d88cba33eb..a151632262699e 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5457,48 +5457,48 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) } }; - ArrayStack retOrThrowBlocks(getAllocator(CMK_ArrayStack)); - - // Visit each block + // Tail merge predecessors // for (BasicBlock* const block : Blocks()) { iterateTailMerge(block); - if (block->isEmpty()) - { - continue; - } + } - if (block->KindIs(BBJ_THROW)) - { - retOrThrowBlocks.Push(block); - } - else if (block->KindIs(BBJ_RETURN) && (block != genReturnBB)) + // Deduplicate RETURN blocks + // + do + { + predInfo.Reset(); + for (BasicBlock* const block : Blocks()) { - // Avoid splitting a return away from a possible tail call - // - if (!block->hasSingleStmt()) + if (block->isEmpty()) + { + continue; + } + + if (block->KindIs(BBJ_THROW)) + { + predInfo.Push(PredInfo(block, block->lastStmt())); + } + else if (block->KindIs(BBJ_RETURN) && (block != genReturnBB)) { - Statement* const lastStmt = block->lastStmt(); - Statement* const prevStmt = lastStmt->GetPrevStmt(); - GenTree* const prevTree = prevStmt->GetRootNode(); - if (prevTree->IsCall() && prevTree->AsCall()->CanTailCall()) + // Avoid splitting a return away from a possible tail call + // + if (!block->hasSingleStmt()) { - continue; + Statement* const lastStmt = block->lastStmt(); + Statement* const prevStmt = lastStmt->GetPrevStmt(); + GenTree* const prevTree = prevStmt->GetRootNode(); + if (prevTree->IsCall() && prevTree->AsCall()->CanTailCall()) + { + continue; + } } - } - retOrThrowBlocks.Push(block); + predInfo.Push(PredInfo(block, block->lastStmt())); + } } - } - - predInfo.Reset(); - for (BasicBlock* const block : retOrThrowBlocks.BottomUpOrder()) - { - predInfo.Push(PredInfo(block, block->lastStmt())); - } - - tailMergePreds(nullptr); + } while (tailMergePreds(nullptr)); // Work through any retries // From 77b2d48610323eff502085596e77774d9005f420 Mon Sep 17 00:00:00 2001 From: BoyBaykiller Date: Tue, 26 May 2026 00:52:13 +0200 Subject: [PATCH 2/9] * process all sets of matchedCandidates at once in tailMerge instead of reinvoking and re-gathering candidates every timme * hack to suppress positive diffs --- src/coreclr/jit/fgopt.cpp | 249 +++++++++++++++++++------------------- 1 file changed, 125 insertions(+), 124 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index a151632262699e..267900ea22600b 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5112,9 +5112,9 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) } #endif - struct PredInfo + struct Candidate { - PredInfo(BasicBlock* block, Statement* stmt) + Candidate(BasicBlock* block, Statement* stmt) : m_block(block) , m_stmt(stmt) { @@ -5123,21 +5123,25 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) Statement* m_stmt; }; - ArrayStack predInfo(getAllocator(CMK_ArrayStack)); - ArrayStack matchedPredInfo(getAllocator(CMK_ArrayStack)); - ArrayStack retryBlocks(getAllocator(CMK_ArrayStack)); + // TODO: Remove temporal hack to supress the improvement diffs + bool supressDiffsOnlyFirstSet = false; + + jitstd::vector candidates(getAllocator(CMK_ArrayStack)); + ArrayStack retryBlocks(getAllocator(CMK_ArrayStack)); // Try tail merging a block. // If return value is true, retry. // May also add to retryBlocks. // - auto tailMergePreds = [&](BasicBlock* commSucc) -> bool { + auto tailMerge = [&](BasicBlock* commSucc = nullptr) -> int { + int optimizedCount = 0; + // Are there enough preds to make it interesting? // - if (predInfo.Height() < 2) + if (candidates.size() < 2) { // Not enough preds to merge - return false; + return optimizedCount; } // If there are large numbers of viable preds, forgo trying to merge. @@ -5146,71 +5150,86 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // Note we check this rather than countOfInEdges because we don't care // about dups, just the number of unique pred blocks. // - if (predInfo.Height() > mergeLimit) + if (candidates.size() > mergeLimit) { // Too many preds to consider - return false; + return optimizedCount; } - // Find a matching set of preds. Potentially O(N^2) tree comparisons. - // - int i = 0; - while (i < (predInfo.Height() - 1)) + BitVecTraits traits(static_cast(candidates.size()), this); + BitVec processedCandidates = BitVecOps::MakeEmpty(&traits); + + ArrayStack matchedCandidates(getAllocator(CMK_ArrayStack)); + + for (int i = static_cast(candidates.size()) - 1; i >= 0; i--) { - matchedPredInfo.Reset(); - matchedPredInfo.Emplace(predInfo.TopRef(i)); - Statement* const baseStmt = predInfo.TopRef(i).m_stmt; - BasicBlock* const baseBlock = predInfo.TopRef(i).m_block; + if (supressDiffsOnlyFirstSet && optimizedCount == 1) + { + return optimizedCount; + } + + const Candidate& candidateA = candidates[i]; - for (int j = i + 1; j < predInfo.Height(); j++) + // Find a matching set of candidates. Potentially O(N^2) tree comparisons. + // + matchedCandidates.Reset(); + matchedCandidates.Emplace(candidateA); + for (int j = i - 1; j >= 0; j--) { - BasicBlock* const otherBlock = predInfo.TopRef(j).m_block; + const Candidate& candidateB = candidates[j]; + + if (BitVecOps::IsMember(&traits, processedCandidates, j)) + { + continue; + } // Consider: bypass this for statements that can't cause exceptions. // - if (!BasicBlock::sameEHRegion(baseBlock, otherBlock)) + if (!BasicBlock::sameEHRegion(candidateA.m_block, candidateB.m_block)) { continue; } - Statement* const otherStmt = predInfo.TopRef(j).m_stmt; - // Consider: compute and cache hashes to make this faster // - if (GenTree::Compare(baseStmt->GetRootNode(), otherStmt->GetRootNode())) + if (GenTree::Compare(candidateA.m_stmt->GetRootNode(), candidateB.m_stmt->GetRootNode())) { - matchedPredInfo.Emplace(predInfo.TopRef(j)); + BitVecOps::AddElemD(&traits, processedCandidates, j); + matchedCandidates.Emplace(candidateB); } } - if (matchedPredInfo.Height() < 2) + if (matchedCandidates.Height() < 2) { - // This pred didn't match any other. Check other preds for matches. - i++; continue; } + optimizedCount++; + madeChanges = true; + // We can move the identical last statements to commSucc, if it exists, // and all preds have matching last statements, and we're not changing EH behavior. // - bool const hasCommSucc = (commSucc != nullptr); - bool const predsInSameEHRegionAsSucc = hasCommSucc && BasicBlock::sameEHRegion(baseBlock, commSucc); - bool const canMergeAllPreds = hasCommSucc && (matchedPredInfo.Height() == (int)commSucc->countOfInEdges()); + bool const hasCommSucc = (commSucc != nullptr); + bool const predsInSameEHRegionAsSucc = + hasCommSucc && BasicBlock::sameEHRegion(candidateA.m_block, commSucc); + bool const canMergeAllPreds = + hasCommSucc && (matchedCandidates.Height() == (int)commSucc->countOfInEdges()); bool const canMergeIntoSucc = predsInSameEHRegionAsSucc && canMergeAllPreds; if (canMergeIntoSucc) { - JITDUMP("All %d preds of " FMT_BB " end with the same tree, moving\n", matchedPredInfo.Height(), + JITDUMP("All %d preds of " FMT_BB " end with the same tree, moving\n", matchedCandidates.Height(), commSucc->bbNum); - JITDUMPEXEC(gtDispStmt(matchedPredInfo.TopRef(0).m_stmt)); + JITDUMPEXEC(gtDispStmt(matchedCandidates.TopRef(0).m_stmt)); - for (int j = 0; j < matchedPredInfo.Height(); j++) + for (int j = 0; j < matchedCandidates.Height(); j++) { - PredInfo& info = matchedPredInfo.TopRef(j); - Statement* const stmt = info.m_stmt; - BasicBlock* const predBlock = info.m_block; + Candidate& candidate = matchedCandidates.TopRef(j); + Statement* const stmt = candidate.m_stmt; + BasicBlock* const block = candidate.m_block; - fgUnlinkStmt(predBlock, stmt); + fgUnlinkStmt(block, stmt); // Add one of the matching stmts to block, and // update its flags. @@ -5218,15 +5237,15 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) if (j == 0) { fgInsertStmtAtBeg(commSucc, stmt); - commSucc->CopyFlags(predBlock, BBF_COPY_PROPAGATE); + commSucc->CopyFlags(block, BBF_COPY_PROPAGATE); } - - madeChanges = true; } // It's worth retrying tail merge on this block. // - return true; + retryBlocks.Push(commSucc); + + continue; } // All or a subset of preds have matching last stmt, we will cross-jump. @@ -5235,40 +5254,40 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // if (predsInSameEHRegionAsSucc) { - JITDUMP("A subset of %d preds of " FMT_BB " end with the same tree\n", matchedPredInfo.Height(), + JITDUMP("A subset of %d preds of " FMT_BB " end with the same tree\n", matchedCandidates.Height(), commSucc->bbNum); } else if (commSucc != nullptr) { JITDUMP("%s %d preds of " FMT_BB " end with the same tree but are in a different EH region\n", - canMergeAllPreds ? "All" : "A subset of", matchedPredInfo.Height(), commSucc->bbNum); + canMergeAllPreds ? "All" : "A subset of", matchedCandidates.Height(), commSucc->bbNum); } else { - JITDUMP("A set of %d return blocks end with the same tree\n", matchedPredInfo.Height()); + JITDUMP("A set of %d return blocks end with the same tree\n", matchedCandidates.Height()); } - JITDUMPEXEC(gtDispStmt(matchedPredInfo.TopRef(0).m_stmt)); + JITDUMPEXEC(gtDispStmt(matchedCandidates.TopRef(0).m_stmt)); BasicBlock* crossJumpVictim = nullptr; Statement* crossJumpStmt = nullptr; bool haveNoSplitVictim = false; bool haveFallThroughVictim = false; - for (PredInfo& info : matchedPredInfo.TopDownOrder()) + for (Candidate& candidate : matchedCandidates.TopDownOrder()) { - Statement* const stmt = info.m_stmt; - BasicBlock* const predBlock = info.m_block; + Statement* const stmt = candidate.m_stmt; + BasicBlock* const block = candidate.m_block; // Never pick the init block as the victim as that would // cause us to add a predecessor to it, which is invalid. - if (predBlock == fgFirstBB) + if (block == fgFirstBB) { continue; } - bool const isNoSplit = stmt == predBlock->firstStmt(); - bool const isFallThrough = (predBlock->KindIs(BBJ_ALWAYS) && predBlock->JumpsToNext()); + bool const isNoSplit = stmt == block->firstStmt(); + bool const isFallThrough = (block->KindIs(BBJ_ALWAYS) && block->JumpsToNext()); // Is this block possibly better than what we have? // @@ -5296,7 +5315,7 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) if (useBlock) { - crossJumpVictim = predBlock; + crossJumpVictim = block; crossJumpStmt = stmt; haveNoSplitVictim = isNoSplit; haveFallThroughVictim = isFallThrough; @@ -5329,72 +5348,59 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // Do the cross jumping // - for (PredInfo& info : matchedPredInfo.TopDownOrder()) + for (Candidate& candidate : matchedCandidates.TopDownOrder()) { - BasicBlock* const predBlock = info.m_block; - Statement* const stmt = info.m_stmt; + BasicBlock* const block = candidate.m_block; + Statement* const stmt = candidate.m_stmt; - if (predBlock == crossJumpVictim) + if (block == crossJumpVictim) { continue; } // remove the statement - fgUnlinkStmt(predBlock, stmt); + fgUnlinkStmt(block, stmt); // Fix up the flow. // if (commSucc != nullptr) { - assert(predBlock->KindIs(BBJ_ALWAYS)); - fgRedirectEdge(predBlock->TargetEdgeRef(), crossJumpTarget); + assert(block->KindIs(BBJ_ALWAYS)); + fgRedirectEdge(block->TargetEdgeRef(), crossJumpTarget); } else { - FlowEdge* const newEdge = fgAddRefPred(crossJumpTarget, predBlock); - predBlock->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge); + FlowEdge* const newEdge = fgAddRefPred(crossJumpTarget, block); + block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge); } - // For tail merge we have a common successor of predBlock and + // For tail merge we have a common successor of block and // crossJumpTarget, so the profile update can be done locally. if (crossJumpTarget->hasProfileWeight()) { - crossJumpTarget->increaseBBProfileWeight(predBlock->bbWeight); + crossJumpTarget->increaseBBProfileWeight(block->bbWeight); } } - // We changed things - // - madeChanges = true; - - // We should try tail merging the cross jump target. + // It's worth retrying tail merge on this block. // retryBlocks.Push(crossJumpTarget); - - // Continue trying to merge in the current block. - // This is a bit inefficient, we could remember how - // far we got through the pred list perhaps. - // - return true; } - // We've looked at everything. - // - return false; + return optimizedCount; }; - auto tailMerge = [&](BasicBlock* block) -> bool { + auto tailMergePreds = [&](BasicBlock* block) -> void { if (block->countOfInEdges() < 2) { // Nothing to merge here - return false; + return; } - predInfo.Reset(); - // Find the subset of preds that reach along non-critical edges - // and populate predInfo. + // and populate candidates. // + candidates.clear(); for (BasicBlock* const predBlock : block->PredBlocks()) { if (predBlock->GetUniqueSucc() != block) @@ -5437,23 +5443,13 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // We don't expect to see PHIs but watch for them anyways. // assert(!lastStmt->IsPhiDefnStmt()); - predInfo.Emplace(predBlock, lastStmt); - } - - return tailMergePreds(block); - }; - - auto iterateTailMerge = [&](BasicBlock* block) -> void { - int numOpts = 0; - - while (tailMerge(block)) - { - numOpts++; + candidates.push_back(Candidate{predBlock, lastStmt}); } + int numOpts = tailMerge(block); if (numOpts > 0) { - JITDUMP("Did %d tail merges in " FMT_BB "\n", numOpts, block->bbNum); + JITDUMP("Merged %d tails going into " FMT_BB "\n", numOpts, block->bbNum); } }; @@ -5461,50 +5457,55 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // for (BasicBlock* const block : Blocks()) { - iterateTailMerge(block); + tailMergePreds(block); } - // Deduplicate RETURN blocks + // Deduplicate RETURN/THROW blocks // - do + candidates.clear(); + for (BasicBlock* const block : Blocks()) { - predInfo.Reset(); - for (BasicBlock* const block : Blocks()) + if (block->isEmpty()) { - if (block->isEmpty()) - { - continue; - } + continue; + } - if (block->KindIs(BBJ_THROW)) - { - predInfo.Push(PredInfo(block, block->lastStmt())); - } - else if (block->KindIs(BBJ_RETURN) && (block != genReturnBB)) + if (block->KindIs(BBJ_THROW)) + { + candidates.push_back(Candidate{block, block->lastStmt()}); + } + else if (block->KindIs(BBJ_RETURN) && (block != genReturnBB)) + { + // Avoid splitting a return away from a possible tail call + // + if (!block->hasSingleStmt()) { - // Avoid splitting a return away from a possible tail call - // - if (!block->hasSingleStmt()) + Statement* const lastStmt = block->lastStmt(); + Statement* const prevStmt = lastStmt->GetPrevStmt(); + GenTree* const prevTree = prevStmt->GetRootNode(); + if (prevTree->IsCall() && prevTree->AsCall()->CanTailCall()) { - Statement* const lastStmt = block->lastStmt(); - Statement* const prevStmt = lastStmt->GetPrevStmt(); - GenTree* const prevTree = prevStmt->GetRootNode(); - if (prevTree->IsCall() && prevTree->AsCall()->CanTailCall()) - { - continue; - } + continue; } - - predInfo.Push(PredInfo(block, block->lastStmt())); } + + candidates.push_back(Candidate{block, block->lastStmt()}); } - } while (tailMergePreds(nullptr)); + } + + supressDiffsOnlyFirstSet = true; + int numOpts = tailMerge(nullptr); + if (numOpts > 0) + { + JITDUMP("Deduplicated %d RETURN/THROW blocks", numOpts); + } + supressDiffsOnlyFirstSet = false; // Work through any retries // while (retryBlocks.Height() > 0) { - iterateTailMerge(retryBlocks.Pop()); + tailMergePreds(retryBlocks.Pop()); } // Visit each block and try to merge first statements of successors. From a679d912d531e27a14a5267838e0a4c31e53c20d Mon Sep 17 00:00:00 2001 From: BoyBaykiller Date: Wed, 27 May 2026 06:07:04 +0200 Subject: [PATCH 3/9] * partition matches to be continous in memory and skip them instead of using a BitVec to sparsely mark them as processed * move de-duplication before tail-merging and then no longer add them to the retry list as it isnt needed * use stl iterator tag to be able to call std::stable_partition * and assert to vector indexer --- src/coreclr/jit/fgopt.cpp | 178 +++++++++++++++----------------- src/coreclr/jit/jitstd/vector.h | 4 +- 2 files changed, 84 insertions(+), 98 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 267900ea22600b..d44624b02d05ba 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5123,9 +5123,6 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) Statement* m_stmt; }; - // TODO: Remove temporal hack to supress the improvement diffs - bool supressDiffsOnlyFirstSet = false; - jitstd::vector candidates(getAllocator(CMK_ArrayStack)); ArrayStack retryBlocks(getAllocator(CMK_ArrayStack)); @@ -5156,50 +5153,36 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) return optimizedCount; } - BitVecTraits traits(static_cast(candidates.size()), this); - BitVec processedCandidates = BitVecOps::MakeEmpty(&traits); - - ArrayStack matchedCandidates(getAllocator(CMK_ArrayStack)); - - for (int i = static_cast(candidates.size()) - 1; i >= 0; i--) + int matchesStart = 0; + int matchesEnd = 0; + while (matchesEnd < (candidates.size() - 1)) { - if (supressDiffsOnlyFirstSet && optimizedCount == 1) - { - return optimizedCount; - } + // matchesEnd from previous iteration becomes new matchesStart. + // + matchesStart = matchesEnd; - const Candidate& candidateA = candidates[i]; + Candidate candidateA = candidates[matchesStart]; - // Find a matching set of candidates. Potentially O(N^2) tree comparisons. + // Find all matching candidates and partition them to be continous in memory. + // The Resulting set is in [matchesStart, matchesEnd) // - matchedCandidates.Reset(); - matchedCandidates.Emplace(candidateA); - for (int j = i - 1; j >= 0; j--) { - const Candidate& candidateB = candidates[j]; - - if (BitVecOps::IsMember(&traits, processedCandidates, j)) - { - continue; - } - - // Consider: bypass this for statements that can't cause exceptions. - // - if (!BasicBlock::sameEHRegion(candidateA.m_block, candidateB.m_block)) - { - continue; - } + auto end = std::stable_partition(candidates.begin() + matchesStart + 1, candidates.end(), + [candidateA](Candidate candidateB) { + // Consider: bypass this for statements that can't cause exceptions. + // + if (!BasicBlock::sameEHRegion(candidateA.m_block, candidateB.m_block)) + { + return false; + } - // Consider: compute and cache hashes to make this faster - // - if (GenTree::Compare(candidateA.m_stmt->GetRootNode(), candidateB.m_stmt->GetRootNode())) - { - BitVecOps::AddElemD(&traits, processedCandidates, j); - matchedCandidates.Emplace(candidateB); - } + return GenTree::Compare(candidateA.m_stmt->GetRootNode(), candidateB.m_stmt->GetRootNode()); + }); + matchesEnd = static_cast(std::distance(candidates.begin(), end)); } - if (matchedCandidates.Height() < 2) + int matchesCount = matchesEnd - matchesStart; + if (matchesCount < 2) { continue; } @@ -5213,34 +5196,46 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) bool const hasCommSucc = (commSucc != nullptr); bool const predsInSameEHRegionAsSucc = hasCommSucc && BasicBlock::sameEHRegion(candidateA.m_block, commSucc); - bool const canMergeAllPreds = - hasCommSucc && (matchedCandidates.Height() == (int)commSucc->countOfInEdges()); + bool const canMergeAllPreds = hasCommSucc && (matchesCount == (int)commSucc->countOfInEdges()); bool const canMergeIntoSucc = predsInSameEHRegionAsSucc && canMergeAllPreds; - if (canMergeIntoSucc) { - JITDUMP("All %d preds of " FMT_BB " end with the same tree, moving\n", matchedCandidates.Height(), - commSucc->bbNum); - JITDUMPEXEC(gtDispStmt(matchedCandidates.TopRef(0).m_stmt)); + JITDUMP("All %d preds of " FMT_BB " end with the same tree, moving\n", matchesCount, commSucc->bbNum); + } + else if (predsInSameEHRegionAsSucc) + { + JITDUMP("A subset of %d preds of " FMT_BB " end with the same tree\n", matchesCount, commSucc->bbNum); + } + else if (hasCommSucc) + { + JITDUMP("%s %d preds of " FMT_BB " end with the same tree but are in a different EH region\n", + canMergeAllPreds ? "All" : "A subset of", matchesCount, commSucc->bbNum); + } + else + { + JITDUMP("A set of %d return/throw blocks end with the same tree\n", matchesCount); + } + JITDUMPEXEC(gtDispStmt(candidates[matchesStart].m_stmt)); - for (int j = 0; j < matchedCandidates.Height(); j++) + if (canMergeIntoSucc) + { + // Remove the statement from the preds + // + for (int i = matchesStart; i < matchesEnd; i++) { - Candidate& candidate = matchedCandidates.TopRef(j); - Statement* const stmt = candidate.m_stmt; + Candidate& candidate = candidates[i]; BasicBlock* const block = candidate.m_block; + Statement* const stmt = candidate.m_stmt; fgUnlinkStmt(block, stmt); - - // Add one of the matching stmts to block, and - // update its flags. - // - if (j == 0) - { - fgInsertStmtAtBeg(commSucc, stmt); - commSucc->CopyFlags(block, BBF_COPY_PROPAGATE); - } } + // Add one of the matching stmts to block, and + // update its flags. + // + fgInsertStmtAtBeg(commSucc, candidates[matchesEnd - 1].m_stmt); + commSucc->CopyFlags(candidates[matchesEnd - 1].m_block, BBF_COPY_PROPAGATE); + // It's worth retrying tail merge on this block. // retryBlocks.Push(commSucc); @@ -5252,32 +5247,17 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // Pick one pred block as the victim -- preferably a block with just one // statement or one that falls through to block (or both). // - if (predsInSameEHRegionAsSucc) - { - JITDUMP("A subset of %d preds of " FMT_BB " end with the same tree\n", matchedCandidates.Height(), - commSucc->bbNum); - } - else if (commSucc != nullptr) - { - JITDUMP("%s %d preds of " FMT_BB " end with the same tree but are in a different EH region\n", - canMergeAllPreds ? "All" : "A subset of", matchedCandidates.Height(), commSucc->bbNum); - } - else - { - JITDUMP("A set of %d return blocks end with the same tree\n", matchedCandidates.Height()); - } - - JITDUMPEXEC(gtDispStmt(matchedCandidates.TopRef(0).m_stmt)); - BasicBlock* crossJumpVictim = nullptr; Statement* crossJumpStmt = nullptr; bool haveNoSplitVictim = false; bool haveFallThroughVictim = false; - for (Candidate& candidate : matchedCandidates.TopDownOrder()) + // todo: investigate why order matters + for (int i = matchesEnd - 1; i >= matchesStart; i--) { - Statement* const stmt = candidate.m_stmt; - BasicBlock* const block = candidate.m_block; + Candidate& candidate = candidates[i]; + BasicBlock* const block = candidate.m_block; + Statement* const stmt = candidate.m_stmt; // Never pick the init block as the victim as that would // cause us to add a predecessor to it, which is invalid. @@ -5348,22 +5328,22 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // Do the cross jumping // - for (Candidate& candidate : matchedCandidates.TopDownOrder()) + for (int i = matchesStart; i < matchesEnd; i++) { - BasicBlock* const block = candidate.m_block; - Statement* const stmt = candidate.m_stmt; + Candidate& candidate = candidates[i]; + BasicBlock* const block = candidate.m_block; + Statement* const stmt = candidate.m_stmt; if (block == crossJumpVictim) { continue; } - // remove the statement fgUnlinkStmt(block, stmt); // Fix up the flow. // - if (commSucc != nullptr) + if (hasCommSucc) { assert(block->KindIs(BBJ_ALWAYS)); fgRedirectEdge(block->TargetEdgeRef(), crossJumpTarget); @@ -5384,7 +5364,10 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // It's worth retrying tail merge on this block. // - retryBlocks.Push(crossJumpTarget); + if (hasCommSucc) + { + retryBlocks.Push(crossJumpTarget); + } } return optimizedCount; @@ -5446,21 +5429,18 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) candidates.push_back(Candidate{predBlock, lastStmt}); } + // todo: investigate why order matters and remove + std::reverse(candidates.begin(), candidates.end()); + int numOpts = tailMerge(block); if (numOpts > 0) { - JITDUMP("Merged %d tails going into " FMT_BB "\n", numOpts, block->bbNum); + JITDUMP("Merged %d set of tails going into " FMT_BB "\n", numOpts, block->bbNum); } }; - // Tail merge predecessors - // - for (BasicBlock* const block : Blocks()) - { - tailMergePreds(block); - } - - // Deduplicate RETURN/THROW blocks + // Deduplicate RETURN/THROW blocks. + // This can enable tail-merging so do it first. // candidates.clear(); for (BasicBlock* const block : Blocks()) @@ -5492,14 +5472,18 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) candidates.push_back(Candidate{block, block->lastStmt()}); } } - - supressDiffsOnlyFirstSet = true; - int numOpts = tailMerge(nullptr); + int numOpts = tailMerge(nullptr); if (numOpts > 0) { - JITDUMP("Deduplicated %d RETURN/THROW blocks", numOpts); + JITDUMP("Deduplicated %d sets of RETURN/THROW blocks\n", numOpts); + } + + // Tail merge predecessors + // + for (BasicBlock* const block : Blocks()) + { + tailMergePreds(block); } - supressDiffsOnlyFirstSet = false; // Work through any retries // diff --git a/src/coreclr/jit/jitstd/vector.h b/src/coreclr/jit/jitstd/vector.h index 2d0a91210ecc95..e6c2423a6f1169 100644 --- a/src/coreclr/jit/jitstd/vector.h +++ b/src/coreclr/jit/jitstd/vector.h @@ -33,7 +33,7 @@ class vector typedef T value_type; // nested classes - class iterator : public jitstd::iterator + class iterator : public jitstd::iterator { iterator(T* ptr); public: @@ -613,6 +613,7 @@ vector& vector::operator=(vector&& vec template typename vector::reference vector::operator[](size_type n) { + assert(n >= 0 && n < m_nSize); return m_pArray[n]; } @@ -620,6 +621,7 @@ template typename vector::const_reference vector::operator[](size_type n) const { + assert(n >= 0 && n < m_nSize); return m_pArray[n]; } From c4e82528413295086fc6198edf3bedf1cf33b254 Mon Sep 17 00:00:00 2001 From: BoyBaykiller Date: Thu, 28 May 2026 03:27:22 +0200 Subject: [PATCH 4/9] * switch to partition over stable_partition, this has some small diffs in downstream phases because the way we choose the crossJumpVictim is order-dependent and non optimal (for example we'd want to avoid new BBF_NEEDS_GCPOLL) * also remove the std::reverse - same reason --- src/coreclr/jit/fgopt.cpp | 12 ++++-------- src/coreclr/jit/jitstd/vector.h | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index d44624b02d05ba..341fc0c3606398 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5167,8 +5167,8 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // The Resulting set is in [matchesStart, matchesEnd) // { - auto end = std::stable_partition(candidates.begin() + matchesStart + 1, candidates.end(), - [candidateA](Candidate candidateB) { + auto end = std::partition(candidates.begin() + matchesStart + 1, candidates.end(), + [candidateA](Candidate candidateB) { // Consider: bypass this for statements that can't cause exceptions. // if (!BasicBlock::sameEHRegion(candidateA.m_block, candidateB.m_block)) @@ -5233,8 +5233,8 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // Add one of the matching stmts to block, and // update its flags. // - fgInsertStmtAtBeg(commSucc, candidates[matchesEnd - 1].m_stmt); - commSucc->CopyFlags(candidates[matchesEnd - 1].m_block, BBF_COPY_PROPAGATE); + fgInsertStmtAtBeg(commSucc, candidates[matchesStart].m_stmt); + commSucc->CopyFlags(candidates[matchesStart].m_block, BBF_COPY_PROPAGATE); // It's worth retrying tail merge on this block. // @@ -5252,7 +5252,6 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) bool haveNoSplitVictim = false; bool haveFallThroughVictim = false; - // todo: investigate why order matters for (int i = matchesEnd - 1; i >= matchesStart; i--) { Candidate& candidate = candidates[i]; @@ -5429,9 +5428,6 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) candidates.push_back(Candidate{predBlock, lastStmt}); } - // todo: investigate why order matters and remove - std::reverse(candidates.begin(), candidates.end()); - int numOpts = tailMerge(block); if (numOpts > 0) { diff --git a/src/coreclr/jit/jitstd/vector.h b/src/coreclr/jit/jitstd/vector.h index e6c2423a6f1169..17862714046504 100644 --- a/src/coreclr/jit/jitstd/vector.h +++ b/src/coreclr/jit/jitstd/vector.h @@ -33,7 +33,7 @@ class vector typedef T value_type; // nested classes - class iterator : public jitstd::iterator + class iterator : public jitstd::iterator { iterator(T* ptr); public: From 86b6a78dc2725b1b394fe5b18bcd7fb039e32057 Mon Sep 17 00:00:00 2001 From: BoyBaykiller Date: Thu, 28 May 2026 03:37:01 +0200 Subject: [PATCH 5/9] * only attempt retries when tail-merging and do it immediately (zero-diff) --- src/coreclr/jit/fgopt.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 341fc0c3606398..668695e9d6cea0 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5254,7 +5254,7 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) for (int i = matchesEnd - 1; i >= matchesStart; i--) { - Candidate& candidate = candidates[i]; + Candidate candidate = candidates[i]; BasicBlock* const block = candidate.m_block; Statement* const stmt = candidate.m_stmt; @@ -5329,7 +5329,7 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // for (int i = matchesStart; i < matchesEnd; i++) { - Candidate& candidate = candidates[i]; + Candidate candidate = candidates[i]; BasicBlock* const block = candidate.m_block; Statement* const stmt = candidate.m_stmt; @@ -5471,7 +5471,7 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) int numOpts = tailMerge(nullptr); if (numOpts > 0) { - JITDUMP("Deduplicated %d sets of RETURN/THROW blocks\n", numOpts); + JITDUMP("Deduplicated %d sets of return/throw blocks\n", numOpts); } // Tail merge predecessors @@ -5479,13 +5479,13 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) for (BasicBlock* const block : Blocks()) { tailMergePreds(block); - } - // Work through any retries - // - while (retryBlocks.Height() > 0) - { - tailMergePreds(retryBlocks.Pop()); + // Work through any retries + // + while (retryBlocks.Height() > 0) + { + tailMergePreds(retryBlocks.Pop()); + } } // Visit each block and try to merge first statements of successors. From c04e7a3af724a21adb9d74df6351a7aa7af50611 Mon Sep 17 00:00:00 2001 From: BoyBaykiller Date: Wed, 3 Jun 2026 22:52:27 +0200 Subject: [PATCH 6/9] * reset to main --- src/coreclr/jit/fgopt.cpp | 277 ++++++++++++++++++++------------------ 1 file changed, 148 insertions(+), 129 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 668695e9d6cea0..e7d171bbc6b70d 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5112,9 +5112,9 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) } #endif - struct Candidate + struct PredInfo { - Candidate(BasicBlock* block, Statement* stmt) + PredInfo(BasicBlock* block, Statement* stmt) : m_block(block) , m_stmt(stmt) { @@ -5123,22 +5123,21 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) Statement* m_stmt; }; - jitstd::vector candidates(getAllocator(CMK_ArrayStack)); - ArrayStack retryBlocks(getAllocator(CMK_ArrayStack)); + ArrayStack predInfo(getAllocator(CMK_ArrayStack)); + ArrayStack matchedPredInfo(getAllocator(CMK_ArrayStack)); + ArrayStack retryBlocks(getAllocator(CMK_ArrayStack)); // Try tail merging a block. // If return value is true, retry. // May also add to retryBlocks. // - auto tailMerge = [&](BasicBlock* commSucc = nullptr) -> int { - int optimizedCount = 0; - + auto tailMergePreds = [&](BasicBlock* commSucc) -> bool { // Are there enough preds to make it interesting? // - if (candidates.size() < 2) + if (predInfo.Height() < 2) { // Not enough preds to merge - return optimizedCount; + return false; } // If there are large numbers of viable preds, forgo trying to merge. @@ -5147,126 +5146,129 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // Note we check this rather than countOfInEdges because we don't care // about dups, just the number of unique pred blocks. // - if (candidates.size() > mergeLimit) + if (predInfo.Height() > mergeLimit) { // Too many preds to consider - return optimizedCount; + return false; } - int matchesStart = 0; - int matchesEnd = 0; - while (matchesEnd < (candidates.size() - 1)) + // Find a matching set of preds. Potentially O(N^2) tree comparisons. + // + int i = 0; + while (i < (predInfo.Height() - 1)) { - // matchesEnd from previous iteration becomes new matchesStart. - // - matchesStart = matchesEnd; - - Candidate candidateA = candidates[matchesStart]; + matchedPredInfo.Reset(); + matchedPredInfo.Emplace(predInfo.TopRef(i)); + Statement* const baseStmt = predInfo.TopRef(i).m_stmt; + BasicBlock* const baseBlock = predInfo.TopRef(i).m_block; - // Find all matching candidates and partition them to be continous in memory. - // The Resulting set is in [matchesStart, matchesEnd) - // + for (int j = i + 1; j < predInfo.Height(); j++) { - auto end = std::partition(candidates.begin() + matchesStart + 1, candidates.end(), - [candidateA](Candidate candidateB) { - // Consider: bypass this for statements that can't cause exceptions. - // - if (!BasicBlock::sameEHRegion(candidateA.m_block, candidateB.m_block)) - { - return false; - } + BasicBlock* const otherBlock = predInfo.TopRef(j).m_block; - return GenTree::Compare(candidateA.m_stmt->GetRootNode(), candidateB.m_stmt->GetRootNode()); - }); - matchesEnd = static_cast(std::distance(candidates.begin(), end)); + // Consider: bypass this for statements that can't cause exceptions. + // + if (!BasicBlock::sameEHRegion(baseBlock, otherBlock)) + { + continue; + } + + Statement* const otherStmt = predInfo.TopRef(j).m_stmt; + + // Consider: compute and cache hashes to make this faster + // + if (GenTree::Compare(baseStmt->GetRootNode(), otherStmt->GetRootNode())) + { + matchedPredInfo.Emplace(predInfo.TopRef(j)); + } } - int matchesCount = matchesEnd - matchesStart; - if (matchesCount < 2) + if (matchedPredInfo.Height() < 2) { + // This pred didn't match any other. Check other preds for matches. + i++; continue; } - optimizedCount++; - madeChanges = true; - // We can move the identical last statements to commSucc, if it exists, // and all preds have matching last statements, and we're not changing EH behavior. // - bool const hasCommSucc = (commSucc != nullptr); - bool const predsInSameEHRegionAsSucc = - hasCommSucc && BasicBlock::sameEHRegion(candidateA.m_block, commSucc); - bool const canMergeAllPreds = hasCommSucc && (matchesCount == (int)commSucc->countOfInEdges()); + bool const hasCommSucc = (commSucc != nullptr); + bool const predsInSameEHRegionAsSucc = hasCommSucc && BasicBlock::sameEHRegion(baseBlock, commSucc); + bool const canMergeAllPreds = hasCommSucc && (matchedPredInfo.Height() == (int)commSucc->countOfInEdges()); bool const canMergeIntoSucc = predsInSameEHRegionAsSucc && canMergeAllPreds; - if (canMergeIntoSucc) - { - JITDUMP("All %d preds of " FMT_BB " end with the same tree, moving\n", matchesCount, commSucc->bbNum); - } - else if (predsInSameEHRegionAsSucc) - { - JITDUMP("A subset of %d preds of " FMT_BB " end with the same tree\n", matchesCount, commSucc->bbNum); - } - else if (hasCommSucc) - { - JITDUMP("%s %d preds of " FMT_BB " end with the same tree but are in a different EH region\n", - canMergeAllPreds ? "All" : "A subset of", matchesCount, commSucc->bbNum); - } - else - { - JITDUMP("A set of %d return/throw blocks end with the same tree\n", matchesCount); - } - JITDUMPEXEC(gtDispStmt(candidates[matchesStart].m_stmt)); if (canMergeIntoSucc) { - // Remove the statement from the preds - // - for (int i = matchesStart; i < matchesEnd; i++) + JITDUMP("All %d preds of " FMT_BB " end with the same tree, moving\n", matchedPredInfo.Height(), + commSucc->bbNum); + JITDUMPEXEC(gtDispStmt(matchedPredInfo.TopRef(0).m_stmt)); + + for (int j = 0; j < matchedPredInfo.Height(); j++) { - Candidate& candidate = candidates[i]; - BasicBlock* const block = candidate.m_block; - Statement* const stmt = candidate.m_stmt; + PredInfo& info = matchedPredInfo.TopRef(j); + Statement* const stmt = info.m_stmt; + BasicBlock* const predBlock = info.m_block; - fgUnlinkStmt(block, stmt); - } + fgUnlinkStmt(predBlock, stmt); - // Add one of the matching stmts to block, and - // update its flags. - // - fgInsertStmtAtBeg(commSucc, candidates[matchesStart].m_stmt); - commSucc->CopyFlags(candidates[matchesStart].m_block, BBF_COPY_PROPAGATE); + // Add one of the matching stmts to block, and + // update its flags. + // + if (j == 0) + { + fgInsertStmtAtBeg(commSucc, stmt); + commSucc->CopyFlags(predBlock, BBF_COPY_PROPAGATE); + } + + madeChanges = true; + } // It's worth retrying tail merge on this block. // - retryBlocks.Push(commSucc); - - continue; + return true; } // All or a subset of preds have matching last stmt, we will cross-jump. // Pick one pred block as the victim -- preferably a block with just one // statement or one that falls through to block (or both). // + if (predsInSameEHRegionAsSucc) + { + JITDUMP("A subset of %d preds of " FMT_BB " end with the same tree\n", matchedPredInfo.Height(), + commSucc->bbNum); + } + else if (commSucc != nullptr) + { + JITDUMP("%s %d preds of " FMT_BB " end with the same tree but are in a different EH region\n", + canMergeAllPreds ? "All" : "A subset of", matchedPredInfo.Height(), commSucc->bbNum); + } + else + { + JITDUMP("A set of %d return blocks end with the same tree\n", matchedPredInfo.Height()); + } + + JITDUMPEXEC(gtDispStmt(matchedPredInfo.TopRef(0).m_stmt)); + BasicBlock* crossJumpVictim = nullptr; Statement* crossJumpStmt = nullptr; bool haveNoSplitVictim = false; bool haveFallThroughVictim = false; - for (int i = matchesEnd - 1; i >= matchesStart; i--) + for (PredInfo& info : matchedPredInfo.TopDownOrder()) { - Candidate candidate = candidates[i]; - BasicBlock* const block = candidate.m_block; - Statement* const stmt = candidate.m_stmt; + Statement* const stmt = info.m_stmt; + BasicBlock* const predBlock = info.m_block; // Never pick the init block as the victim as that would // cause us to add a predecessor to it, which is invalid. - if (block == fgFirstBB) + if (predBlock == fgFirstBB) { continue; } - bool const isNoSplit = stmt == block->firstStmt(); - bool const isFallThrough = (block->KindIs(BBJ_ALWAYS) && block->JumpsToNext()); + bool const isNoSplit = stmt == predBlock->firstStmt(); + bool const isFallThrough = (predBlock->KindIs(BBJ_ALWAYS) && predBlock->JumpsToNext()); // Is this block possibly better than what we have? // @@ -5294,7 +5296,7 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) if (useBlock) { - crossJumpVictim = block; + crossJumpVictim = predBlock; crossJumpStmt = stmt; haveNoSplitVictim = isNoSplit; haveFallThroughVictim = isFallThrough; @@ -5327,62 +5329,72 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // Do the cross jumping // - for (int i = matchesStart; i < matchesEnd; i++) + for (PredInfo& info : matchedPredInfo.TopDownOrder()) { - Candidate candidate = candidates[i]; - BasicBlock* const block = candidate.m_block; - Statement* const stmt = candidate.m_stmt; + BasicBlock* const predBlock = info.m_block; + Statement* const stmt = info.m_stmt; - if (block == crossJumpVictim) + if (predBlock == crossJumpVictim) { continue; } - fgUnlinkStmt(block, stmt); + // remove the statement + fgUnlinkStmt(predBlock, stmt); // Fix up the flow. // - if (hasCommSucc) + if (commSucc != nullptr) { - assert(block->KindIs(BBJ_ALWAYS)); - fgRedirectEdge(block->TargetEdgeRef(), crossJumpTarget); + assert(predBlock->KindIs(BBJ_ALWAYS)); + fgRedirectEdge(predBlock->TargetEdgeRef(), crossJumpTarget); } else { - FlowEdge* const newEdge = fgAddRefPred(crossJumpTarget, block); - block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge); + FlowEdge* const newEdge = fgAddRefPred(crossJumpTarget, predBlock); + predBlock->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge); } - // For tail merge we have a common successor of block and + // For tail merge we have a common successor of predBlock and // crossJumpTarget, so the profile update can be done locally. if (crossJumpTarget->hasProfileWeight()) { - crossJumpTarget->increaseBBProfileWeight(block->bbWeight); + crossJumpTarget->increaseBBProfileWeight(predBlock->bbWeight); } } - // It's worth retrying tail merge on this block. + // We changed things // - if (hasCommSucc) - { - retryBlocks.Push(crossJumpTarget); - } + madeChanges = true; + + // We should try tail merging the cross jump target. + // + retryBlocks.Push(crossJumpTarget); + + // Continue trying to merge in the current block. + // This is a bit inefficient, we could remember how + // far we got through the pred list perhaps. + // + return true; } - return optimizedCount; + // We've looked at everything. + // + return false; }; - auto tailMergePreds = [&](BasicBlock* block) -> void { + auto tailMerge = [&](BasicBlock* block) -> bool { if (block->countOfInEdges() < 2) { // Nothing to merge here - return; + return false; } + predInfo.Reset(); + // Find the subset of preds that reach along non-critical edges - // and populate candidates. + // and populate predInfo. // - candidates.clear(); for (BasicBlock* const predBlock : block->PredBlocks()) { if (predBlock->GetUniqueSucc() != block) @@ -5425,22 +5437,33 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // We don't expect to see PHIs but watch for them anyways. // assert(!lastStmt->IsPhiDefnStmt()); - candidates.push_back(Candidate{predBlock, lastStmt}); + predInfo.Emplace(predBlock, lastStmt); + } + + return tailMergePreds(block); + }; + + auto iterateTailMerge = [&](BasicBlock* block) -> void { + int numOpts = 0; + + while (tailMerge(block)) + { + numOpts++; } - int numOpts = tailMerge(block); if (numOpts > 0) { - JITDUMP("Merged %d set of tails going into " FMT_BB "\n", numOpts, block->bbNum); + JITDUMP("Did %d tail merges in " FMT_BB "\n", numOpts, block->bbNum); } }; - // Deduplicate RETURN/THROW blocks. - // This can enable tail-merging so do it first. + ArrayStack retOrThrowBlocks(getAllocator(CMK_ArrayStack)); + + // Visit each block // - candidates.clear(); for (BasicBlock* const block : Blocks()) { + iterateTailMerge(block); if (block->isEmpty()) { continue; @@ -5448,7 +5471,7 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) if (block->KindIs(BBJ_THROW)) { - candidates.push_back(Candidate{block, block->lastStmt()}); + retOrThrowBlocks.Push(block); } else if (block->KindIs(BBJ_RETURN) && (block != genReturnBB)) { @@ -5465,27 +5488,23 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) } } - candidates.push_back(Candidate{block, block->lastStmt()}); + retOrThrowBlocks.Push(block); } } - int numOpts = tailMerge(nullptr); - if (numOpts > 0) + + predInfo.Reset(); + for (BasicBlock* const block : retOrThrowBlocks.BottomUpOrder()) { - JITDUMP("Deduplicated %d sets of return/throw blocks\n", numOpts); + predInfo.Push(PredInfo(block, block->lastStmt())); } - // Tail merge predecessors + tailMergePreds(nullptr); + + // Work through any retries // - for (BasicBlock* const block : Blocks()) + while (retryBlocks.Height() > 0) { - tailMergePreds(block); - - // Work through any retries - // - while (retryBlocks.Height() > 0) - { - tailMergePreds(retryBlocks.Pop()); - } + iterateTailMerge(retryBlocks.Pop()); } // Visit each block and try to merge first statements of successors. @@ -5895,4 +5914,4 @@ PhaseStatus Compiler::fgResolveGDVs() } return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; -} +} \ No newline at end of file From aa0668947805d0947b2754d667f2fbc4a4115f85 Mon Sep 17 00:00:00 2001 From: BoyBaykiller Date: Wed, 3 Jun 2026 23:11:25 +0200 Subject: [PATCH 7/9] * call tailMergePreds until no sets are left --- src/coreclr/jit/fgopt.cpp | 43 ++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index e7d171bbc6b70d..f6f7c5f378fd42 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5492,13 +5492,46 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) } } - predInfo.Reset(); - for (BasicBlock* const block : retOrThrowBlocks.BottomUpOrder()) + if (retOrThrowBlocks.Height() > 1) { - predInfo.Push(PredInfo(block, block->lastStmt())); - } + JITDUMP("Trying tail merge of return and throw blocks\n"); + + for (int i = 0; i < retOrThrowBlocks.Height() - 1; i++) + { + BasicBlock* const block = retOrThrowBlocks.TopRef(i); + + // If this block was already merged, skip it + // + if (!block->KindIs(BBJ_RETURN, BBJ_THROW)) + { + continue; + } + + // Regather all candidates + // + predInfo.Reset(); + for (int j = i; j < retOrThrowBlocks.Height(); j++) + { + BasicBlock* const otherBlock = retOrThrowBlocks.TopRef(j); + + // If this block was already merged, skip it + // + if (otherBlock->GetKind() != block->GetKind()) + { + continue; + } + + predInfo.Push(PredInfo(otherBlock, otherBlock->lastStmt())); + } - tailMergePreds(nullptr); + // Do the merging and bail if no sets are left + // + if (!tailMergePreds(nullptr)) + { + break; + } + } + } // Work through any retries // From 8a09dbc57d370282830e32f8c7588fc4d407ba00 Mon Sep 17 00:00:00 2001 From: BoyBaykiller Date: Wed, 3 Jun 2026 23:27:47 +0200 Subject: [PATCH 8/9] * dont bail as it looses diffs in the current state --- src/coreclr/jit/fgopt.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index f6f7c5f378fd42..3df5feefeec2e7 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5524,12 +5524,7 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) predInfo.Push(PredInfo(otherBlock, otherBlock->lastStmt())); } - // Do the merging and bail if no sets are left - // - if (!tailMergePreds(nullptr)) - { - break; - } + tailMergePreds(nullptr); } } @@ -5947,4 +5942,4 @@ PhaseStatus Compiler::fgResolveGDVs() } return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; -} \ No newline at end of file +} From ceba75390e39e2ee6bd491e6d85b44b4d0d471b1 Mon Sep 17 00:00:00 2001 From: BoyBaykiller Date: Thu, 4 Jun 2026 00:50:06 +0200 Subject: [PATCH 9/9] * fix impl --- src/coreclr/jit/fgopt.cpp | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 3df5feefeec2e7..5be7aea1445a53 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5492,41 +5492,21 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) } } - if (retOrThrowBlocks.Height() > 1) + JITDUMP("Trying tail merge of return and throw blocks\n"); + do { - JITDUMP("Trying tail merge of return and throw blocks\n"); - - for (int i = 0; i < retOrThrowBlocks.Height() - 1; i++) + predInfo.Reset(); + for (BasicBlock* const block : retOrThrowBlocks.BottomUpOrder()) { - BasicBlock* const block = retOrThrowBlocks.TopRef(i); - // If this block was already merged, skip it // if (!block->KindIs(BBJ_RETURN, BBJ_THROW)) { continue; } - - // Regather all candidates - // - predInfo.Reset(); - for (int j = i; j < retOrThrowBlocks.Height(); j++) - { - BasicBlock* const otherBlock = retOrThrowBlocks.TopRef(j); - - // If this block was already merged, skip it - // - if (otherBlock->GetKind() != block->GetKind()) - { - continue; - } - - predInfo.Push(PredInfo(otherBlock, otherBlock->lastStmt())); - } - - tailMergePreds(nullptr); + predInfo.Push(PredInfo(block, block->lastStmt())); } - } + } while (tailMergePreds(nullptr)); // Work through any retries //