From 3bdeab2f1b47f8368f57c511dddf902c8d9bd45b Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 2 Jun 2026 18:01:09 +0800 Subject: [PATCH 01/18] Precommit tests --- .../LoopVectorize/VPlan/predicator.ll | 82 +++++++++++++++++++ .../Transforms/LoopVectorize/predicator.ll | 69 ++++++++++++++++ 2 files changed, 151 insertions(+) diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll index 6f33d05b044e6..a9514684b4a9f 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll @@ -750,3 +750,85 @@ loop.latch: exit: ret void } + +define void @simplifiable_blend(i1 %c1, i1 %c2, i1 %c3, i32 %x, i32 %y, ptr %p) { +; CHECK-LABEL: VPlan for loop in 'simplifiable_blend' +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = CANONICAL-IV +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: +; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VP0:%[0-9]+]]> +; CHECK-NEXT: Successor(s): B +; CHECK-EMPTY: +; CHECK-NEXT: B: +; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%c1> +; CHECK-NEXT: Successor(s): E +; CHECK-EMPTY: +; CHECK-NEXT: E: +; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%c3> +; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = logical-and vp<[[VP4]]>, vp<[[VP5]]> +; CHECK-NEXT: Successor(s): F +; CHECK-EMPTY: +; CHECK-NEXT: F: +; CHECK-NEXT: Successor(s): A +; CHECK-EMPTY: +; CHECK-NEXT: A: +; CHECK-NEXT: Successor(s): D +; CHECK-EMPTY: +; CHECK-NEXT: D: +; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = not ir<%c2> +; CHECK-NEXT: EMIT vp<[[VP8:%[0-9]+]]> = logical-and ir<%c1>, vp<[[VP7]]> +; CHECK-NEXT: Successor(s): C +; CHECK-EMPTY: +; CHECK-NEXT: C: +; CHECK-NEXT: EMIT vp<[[VP9:%[0-9]+]]> = logical-and ir<%c1>, ir<%c2> +; CHECK-NEXT: Successor(s): latch +; CHECK-EMPTY: +; CHECK-NEXT: latch: +; CHECK-NEXT: BLEND ir<%phi> = ir<%y>/vp<[[VP4]]> ir<%x>/vp<[[VP8]]> ir<%x>/vp<[[VP9]]> +; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%p>, ir<%iv> +; CHECK-NEXT: EMIT store ir<%phi>, ir<%gep> +; CHECK-NEXT: EMIT ir<%iv.next> = add ir<%iv>, ir<1> +; CHECK-NEXT: EMIT ir<%ec> = icmp eq ir<%iv.next>, ir<128> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1:%[0-9]+]]> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2:%[0-9]+]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %latch] + br i1 %c1, label %A, label %B + +A: + br i1 %c2, label %C, label %D + +B: + br i1 %c3, label %F, label %E + +C: + br label %latch + +D: + br label %latch + +E: + br label %F + +F: + br label %latch + +latch: + %phi = phi i32 [ %x, %C ], [ %x, %D ], [ %y, %F ] + %gep = getelementptr i32, ptr %p, i32 %iv + store i32 %phi, ptr %gep + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv.next, 128 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/predicator.ll b/llvm/test/Transforms/LoopVectorize/predicator.ll index 9760de76fc07f..36e6fb778bc3e 100644 --- a/llvm/test/Transforms/LoopVectorize/predicator.ll +++ b/llvm/test/Transforms/LoopVectorize/predicator.ll @@ -231,3 +231,72 @@ bb3: exit: ret void } + +define void @simplifiable_blend(i1 %c1, i1 %c2, i1 %c3, i32 %x, i32 %y, ptr %p) { +; CHECK-LABEL: define void @simplifiable_blend( +; CHECK-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i1> poison, i1 [[C2]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT3]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i1> poison, i1 [[C1]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT5]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT4]], splat (i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[BROADCAST_SPLAT6]], <4 x i1> [[TMP0]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[BROADCAST_SPLAT6]], <4 x i1> [[BROADCAST_SPLAT4]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> [[PREDPHI]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[P]], i32 [[INDEX]] +; CHECK-NEXT: store <4 x i32> [[PREDPHI7]], ptr [[TMP3]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %latch] + br i1 %c1, label %A, label %B + +A: + br i1 %c2, label %C, label %D + +B: + br i1 %c3, label %F, label %E + +C: + br label %latch + +D: + br label %latch + +E: + br label %F + +F: + br label %latch + +latch: + %phi = phi i32 [ %x, %C ], [ %x, %D ], [ %y, %F ] + %gep = getelementptr i32, ptr %p, i32 %iv + store i32 %phi, ptr %gep + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv.next, 128 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} From e3b9c3012126d42f1f32f654e0711ee43b5bc9bf Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 3 Jun 2026 23:55:02 +0800 Subject: [PATCH 02/18] Simplify blend masks --- .../include/llvm/Analysis/DominanceFrontier.h | 1 + .../Transforms/Vectorize/VPlanDominatorTree.h | 8 + .../Transforms/Vectorize/VPlanPredicator.cpp | 147 +++++++++++++++++- .../LoopVectorize/VPlan/predicator.ll | 12 +- .../LoopVectorize/predicate-switch.ll | 9 +- .../Transforms/LoopVectorize/predicator.ll | 10 +- .../LoopVectorize/reduction-inloop-pred.ll | 10 +- .../LoopVectorize/reduction-inloop.ll | 21 +-- .../Transforms/LoopVectorize/reduction.ll | 10 +- 9 files changed, 181 insertions(+), 47 deletions(-) diff --git a/llvm/include/llvm/Analysis/DominanceFrontier.h b/llvm/include/llvm/Analysis/DominanceFrontier.h index fd38891e901e3..4a8ab96cf71a7 100644 --- a/llvm/include/llvm/Analysis/DominanceFrontier.h +++ b/llvm/include/llvm/Analysis/DominanceFrontier.h @@ -78,6 +78,7 @@ class DominanceFrontierBase { const_iterator end() const { return Frontiers.end(); } iterator find(BlockT *B) { return Frontiers.find(B); } const_iterator find(BlockT *B) const { return Frontiers.find(B); } + const_iterator find(const BlockT *B) const { return Frontiers.find(B); } /// print - Convert to human readable form /// diff --git a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h index 2864670f44913..1ad522880c709 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h +++ b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h @@ -18,6 +18,8 @@ #include "VPlan.h" #include "VPlanCFG.h" #include "llvm/ADT/GraphTraits.h" +#include "llvm/Analysis/DominanceFrontier.h" +#include "llvm/Analysis/DominanceFrontierImpl.h" #include "llvm/IR/Dominators.h" #include "llvm/Support/GenericDomTree.h" #include "llvm/Support/GenericDomTreeConstruction.h" @@ -67,5 +69,11 @@ template <> struct GraphTraits : public DomTreeGraphTraitsBase {}; + +class VPPostDominanceFrontier + : public DominanceFrontierBase { +public: + explicit VPPostDominanceFrontier(const DomTreeT &VPDT) { analyze(VPDT); } +}; } // namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index 655ac58e24426..2a947df080a09 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -34,6 +34,9 @@ class VPPredicator { /// Post-dominator tree for the VPlan. VPPostDominatorTree VPPDT; + /// Post-dominator frontier for the VPlan. + VPPostDominanceFrontier VPPDF; + /// When we if-convert we need to create edge masks. We have to cache values /// so that we don't end up with exponential recursion/IR. using EdgeMaskCacheTy = @@ -78,8 +81,17 @@ class VPPredicator { return VPBB->getFirstNonPhi(); } + using EdgeTy = std::pair; + + /// Compute the "furthest up" set of edges for each incoming value of \Phi. + MapVector computeBlendEdges(VPPhi *Phi); + + /// Given a set of \p Edges that lead to \p VPBB, return the OR of all edges + /// or an equivalent block in-mask. + VPValue *createMaskDisjunction(ArrayRef Edges, VPBasicBlock *VPBB); + public: - VPPredicator(VPlan &Plan) : VPDT(Plan), VPPDT(Plan) {} + VPPredicator(VPlan &Plan) : VPDT(Plan), VPPDT(Plan), VPPDF(VPPDT) {} /// Returns the *entry* mask for \p VPBB. VPValue *getBlockInMask(const VPBasicBlock *VPBB) const { @@ -233,6 +245,115 @@ void VPPredicator::createSwitchEdgeMasks(const VPInstruction *SI) { setEdgeMask(Src, DefaultDst, DefaultMask); } +// Compute the "furthest up" set of edges for each incoming value of a phi. +// +// Start by keeping track of what edges lead to which value. Then see if any +// node has the same value for all outgoing edges. If so then propagate that +// value up to every node it postdominates. +MapVector +VPPredicator::computeBlendEdges(VPPhi *Phi) { + MapVector Edges; + + // Mark the given edge as providing the value \p V. + auto AddEdge = [&Edges](const VPBlockBase *From, const VPBlockBase *To, + VPValue *V) { + EdgeTy Edge = {cast(From), cast(To)}; + assert((!Edges.contains(Edge) || Edges.lookup(Edge) == V) && + "Clobbering an edge?"); + Edges[Edge] = V; + }; + + for (auto [InVal, InVPBB] : Phi->incoming_values_and_blocks()) + AddEdge(InVPBB, Phi->getParent(), InVal); + + // The root phi must postdominate every incoming block. Also don't touch + // phis in a reduction chain since they need to be in a specific structure + // for handle*Reductions. + for (auto [InVal, InVPBB] : Phi->incoming_values_and_blocks()) + if (!VPPDT.dominates(Phi->getParent(), InVPBB) || + isa(InVal)) + return Edges; + + // Given a list of edges, check if they all have the same value and return it. + auto GetAllEqual = [&Edges](ArrayRef OutEdges) -> VPValue * { + VPValue *Common = nullptr; + for (EdgeTy E : OutEdges) { + VPValue *V = Edges.lookup(E); + if (!V) + return nullptr; + if (match(V, m_Poison())) + continue; + if (!Common) + Common = V; + else if (Common != V) + return nullptr; + } + return Common; + }; + + SetVector Worklist(from_range, Phi->incoming_blocks()); + while (!Worklist.empty()) { + auto *VPBB = cast(Worklist.pop_back_val()); + + // Check that all outgoing edges from VPBB have the same value. + SmallVector OutEdges; + for (const VPBlockBase *Succ : VPBB->getSuccessors()) + OutEdges.emplace_back(VPBB, cast(Succ)); + VPValue *Common = GetAllEqual(OutEdges); + if (!Common) + continue; + + // They have the same value: we can move the edges up + for (EdgeTy Edge : OutEdges) + Edges.erase(Edge); + + // Peek through phis that are postdominated by VPBB + if (auto *Phi = dyn_cast(Common)) + if (VPPDT.dominates(VPBB, Phi->getParent())) { + for (auto [InV, InVPBB] : Phi->incoming_values_and_blocks()) { + AddEdge(InVPBB, Phi->getParent(), InV); + Worklist.insert(InVPBB); + } + continue; + } + + // Iterate up through the post dominance frontier + for (const VPBlockBase *Frontier : VPPDF.find(VPBB)->second) { + for (const VPBlockBase *FrontierSucc : Frontier->getSuccessors()) + if (VPPDT.dominates(VPBB, FrontierSucc)) + AddEdge(Frontier, FrontierSucc, Common); + Worklist.insert(cast(Frontier)); + } + } + + return Edges; +} + +VPValue *VPPredicator::createMaskDisjunction(ArrayRef Edges, + VPBasicBlock *VPBB) { + auto Dsts = map_range(Edges, [](auto E) { return E.second; }); + const VPBasicBlock *PostDom = *Dsts.begin(); + for (const VPBasicBlock *VPBB : drop_begin(Dsts)) + PostDom = + cast(VPPDT.findNearestCommonDominator(PostDom, VPBB)); + assert(VPPDT.dominates(VPBB, PostDom) && "Edges don't postdominate VPBB"); + if (PostDom != VPBB) + return getBlockInMask(PostDom); + + VPValue *Mask = nullptr; + for (auto [Src, ConstDst] : Edges) { + auto *Dst = const_cast(ConstDst); + VPValue *EdgeMask; + { + VPBuilder::InsertPointGuard Guard(Builder); + Builder.setInsertPoint(Dst, getMaskInsertPoint(Dst)); + EdgeMask = createEdgeMask(Src, Dst); + } + Mask = Mask ? Builder.createOr(Mask, EdgeMask) : EdgeMask; + } + return Mask; +} + void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) { Builder.setInsertPoint(VPBB, getMaskInsertPoint(VPBB)); @@ -256,10 +377,30 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) { continue; } + MapVector> InValEdgesMap; + for (auto [Edge, Val] : computeBlendEdges(PhiR)) + InValEdgesMap[Val].push_back(Edge); + auto InValEdges = InValEdgesMap.takeVector(); + + if (InValEdges.size() == 1) { + PhiR->replaceAllUsesWith(InValEdges[0].first); + PhiR->eraseFromParent(); + continue; + } + + // Sort the incoming value order to match PhiR as much as possible. + llvm::stable_sort(InValEdges, [&PhiR](auto &L, auto &R) { + auto InVs = PhiR->incoming_values(); + return std::distance(InVs.begin(), find(InVs, L.first)) < + std::distance(InVs.begin(), find(InVs, R.first)); + }); + SmallVector OperandsWithMask; - for (const auto &[InVPV, InVPBB] : PhiR->incoming_values_and_blocks()) { + for (const auto &[InVPV, Edges] : InValEdges) { + if (match(InVPV, m_Poison())) + continue; OperandsWithMask.push_back(InVPV); - OperandsWithMask.push_back(createEdgeMask(InVPBB, VPBB)); + OperandsWithMask.push_back(createMaskDisjunction(Edges, VPBB)); } PHINode *IRPhi = cast_or_null(PhiR->getUnderlyingValue()); auto *Blend = diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll index a9514684b4a9f..a994367670242 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll @@ -309,7 +309,7 @@ define void @switch(ptr %a) { ; CHECK-NEXT: EMIT vp<[[VP13:%[0-9]+]]> = not vp<[[VP12]]> ; CHECK-NEXT: EMIT vp<[[VP14:%[0-9]+]]> = logical-and ir<%c0>, vp<[[VP13]]> ; CHECK-NEXT: EMIT vp<[[VP15:%[0-9]+]]> = or vp<[[VP5]]>, vp<[[VP11]]> -; CHECK-NEXT: BLEND ir<%phi3> = ir<%add2>/vp<[[VP5]]> ir<%add1>/vp<[[VP11]]> ir<%add1>/vp<[[VP11]]> +; CHECK-NEXT: BLEND ir<%phi3> = ir<%add2>/vp<[[VP5]]> ir<%add1>/vp<[[VP11]]> ; CHECK-NEXT: EMIT ir<%add3> = add ir<%phi3>, ir<3>, vp<[[VP15]]> ; CHECK-NEXT: Successor(s): bb4 ; CHECK-EMPTY: @@ -665,8 +665,6 @@ define void @blend_chain_non_trivial(ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: Successor(s): merge.a ; CHECK-EMPTY: ; CHECK-NEXT: merge.a: -; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%c0> -; CHECK-NEXT: BLEND ir<%blend.a> = ir<%v1>/ir<%c0> ir<%v1>/vp<[[VP5]]> ; CHECK-NEXT: EMIT ir<%d0> = icmp sgt ir<%iv>, ir<0> ; CHECK-NEXT: Successor(s): if.b ; CHECK-EMPTY: @@ -675,16 +673,14 @@ define void @blend_chain_non_trivial(ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: Successor(s): if.b.inner ; CHECK-EMPTY: ; CHECK-NEXT: if.b.inner: -; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = logical-and ir<%d0>, ir<%cb> +; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = logical-and ir<%d0>, ir<%cb> ; CHECK-NEXT: Successor(s): merge.b.inner ; CHECK-EMPTY: ; CHECK-NEXT: merge.b.inner: ; CHECK-NEXT: Successor(s): merge.b ; CHECK-EMPTY: ; CHECK-NEXT: merge.b: -; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = not ir<%d0> -; CHECK-NEXT: BLEND ir<%blend.b> = ir<%v2>/ir<%d0> ir<%v2>/vp<[[VP7]]> -; CHECK-NEXT: EMIT ir<%sum> = add ir<%blend.a>, ir<%blend.b> +; CHECK-NEXT: EMIT ir<%sum> = add ir<%v1>, ir<%v2> ; CHECK-NEXT: EMIT store ir<%sum>, ir<%gep> ; CHECK-NEXT: Successor(s): loop.latch ; CHECK-EMPTY: @@ -785,7 +781,7 @@ define void @simplifiable_blend(i1 %c1, i1 %c2, i1 %c3, i32 %x, i32 %y, ptr %p) ; CHECK-NEXT: Successor(s): latch ; CHECK-EMPTY: ; CHECK-NEXT: latch: -; CHECK-NEXT: BLEND ir<%phi> = ir<%y>/vp<[[VP4]]> ir<%x>/vp<[[VP8]]> ir<%x>/vp<[[VP9]]> +; CHECK-NEXT: BLEND ir<%phi> = ir<%y>/vp<[[VP4]]> ir<%x>/ir<%c1> ; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%p>, ir<%iv> ; CHECK-NEXT: EMIT store ir<%phi>, ir<%gep> ; CHECK-NEXT: EMIT ir<%iv.next> = add ir<%iv>, ir<1> diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll index 96775e0ff082e..d94b1d606e738 100644 --- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll @@ -545,8 +545,7 @@ define void @switch_unconditional_duplicate_target(ptr %start, ptr %dest) { ; IC1-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP0]], i64 0 ; IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META6:![0-9]+]] ; IC1-NEXT: [[TMP5:%.*]] = icmp ult <2 x i32> [[WIDE_LOAD]], splat (i32 10) -; IC1-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x ptr> [[BROADCAST_SPLAT]], <2 x ptr> [[TMP0]] -; IC1-NEXT: [[PREDPHI2:%.*]] = select <2 x i1> [[TMP5]], <2 x ptr> [[BROADCAST_SPLAT]], <2 x ptr> [[PREDPHI]] +; IC1-NEXT: [[PREDPHI2:%.*]] = select <2 x i1> [[TMP5]], <2 x ptr> [[BROADCAST_SPLAT]], <2 x ptr> [[TMP0]] ; IC1-NEXT: [[TMP1:%.*]] = extractelement <2 x ptr> [[PREDPHI2]], i64 0 ; IC1-NEXT: [[TMP2:%.*]] = extractelement <2 x ptr> [[PREDPHI2]], i64 1 ; IC1-NEXT: store i32 0, ptr [[TMP1]], align 4 @@ -605,12 +604,10 @@ define void @switch_unconditional_duplicate_target(ptr %start, ptr %dest) { ; IC2-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META6]] ; IC2-NEXT: [[TMP9:%.*]] = icmp ult <2 x i32> [[WIDE_LOAD]], splat (i32 10) ; IC2-NEXT: [[TMP10:%.*]] = icmp ult <2 x i32> [[WIDE_LOAD2]], splat (i32 10) -; IC2-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x ptr> [[BROADCAST_SPLAT]], <2 x ptr> [[TMP0]] -; IC2-NEXT: [[PREDPHI2:%.*]] = select <2 x i1> [[TMP9]], <2 x ptr> [[BROADCAST_SPLAT]], <2 x ptr> [[PREDPHI]] +; IC2-NEXT: [[PREDPHI2:%.*]] = select <2 x i1> [[TMP9]], <2 x ptr> [[BROADCAST_SPLAT]], <2 x ptr> [[TMP0]] ; IC2-NEXT: [[TMP2:%.*]] = extractelement <2 x ptr> [[PREDPHI2]], i64 0 ; IC2-NEXT: [[TMP3:%.*]] = extractelement <2 x ptr> [[PREDPHI2]], i64 1 -; IC2-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[TMP10]], <2 x ptr> [[BROADCAST_SPLAT]], <2 x ptr> [[TMP1]] -; IC2-NEXT: [[PREDPHI4:%.*]] = select <2 x i1> [[TMP10]], <2 x ptr> [[BROADCAST_SPLAT]], <2 x ptr> [[PREDPHI5]] +; IC2-NEXT: [[PREDPHI4:%.*]] = select <2 x i1> [[TMP10]], <2 x ptr> [[BROADCAST_SPLAT]], <2 x ptr> [[TMP1]] ; IC2-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[PREDPHI4]], i64 0 ; IC2-NEXT: [[TMP5:%.*]] = extractelement <2 x ptr> [[PREDPHI4]], i64 1 ; IC2-NEXT: store i32 0, ptr [[TMP2]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/predicator.ll b/llvm/test/Transforms/LoopVectorize/predicator.ll index 36e6fb778bc3e..57414ae62c341 100644 --- a/llvm/test/Transforms/LoopVectorize/predicator.ll +++ b/llvm/test/Transforms/LoopVectorize/predicator.ll @@ -242,15 +242,7 @@ define void @simplifiable_blend(i1 %c1, i1 %c2, i1 %c3, i32 %x, i32 %y, ptr %p) ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i1> poison, i1 [[C2]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT3]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i1> poison, i1 [[C1]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT5]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT4]], splat (i1 true) -; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[BROADCAST_SPLAT6]], <4 x i1> [[TMP0]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[BROADCAST_SPLAT6]], <4 x i1> [[BROADCAST_SPLAT4]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[C1]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> [[BROADCAST_SPLAT]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll index c0364c6fc5032..2c5459d472328 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll @@ -667,16 +667,14 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP4:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) -; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) -; CHECK-NEXT: [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) +; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> [[TMP4]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP10]], [[TMP3]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] -; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP9]], <4 x float> [[PREDPHI]] -; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP3]], <4 x float> [[PREDPHI2]], <4 x float> [[VEC_PHI]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP7]], <4 x float> [[VEC_PHI]] +; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP9]], <4 x float> [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index d9d30f8f3e0a5..7f932a771d55a 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -1097,10 +1097,11 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP6]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] -; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP9]], <4 x float> [[PREDPHI]] -; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP3]], <4 x float> [[PREDPHI2]], <4 x float> [[VEC_PHI]] +; CHECK-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP11]], [[TMP14]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP15]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] +; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP9]], <4 x float> [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] @@ -1145,14 +1146,16 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP6]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = fadd fast <4 x float> [[VEC_PHI1]], [[WIDE_LOAD2]] +; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) +; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) ; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = select <4 x i1> [[TMP19]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer ; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = select <4 x i1> [[TMP21]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer -; CHECK-INTERLEAVED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP20]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP13]] -; CHECK-INTERLEAVED-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP15]], <4 x float> [[TMP17]], <4 x float> [[PREDPHI]] -; CHECK-INTERLEAVED-NEXT: [[PREDPHI6]] = select <4 x i1> [[TMP5]], <4 x float> [[PREDPHI5]], <4 x float> [[VEC_PHI]] -; CHECK-INTERLEAVED-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP22]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP14]] -; CHECK-INTERLEAVED-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP16]], <4 x float> [[TMP18]], <4 x float> [[PREDPHI7]] -; CHECK-INTERLEAVED-NEXT: [[PREDPHI9]] = select <4 x i1> [[TMP6]], <4 x float> [[PREDPHI8]], <4 x float> [[VEC_PHI1]] +; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = or <4 x i1> [[TMP20]], [[TMP27]] +; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = or <4 x i1> [[TMP22]], [[TMP28]] +; CHECK-INTERLEAVED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP25]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP13]] +; CHECK-INTERLEAVED-NEXT: [[PREDPHI6]] = select <4 x i1> [[TMP15]], <4 x float> [[TMP17]], <4 x float> [[PREDPHI]] +; CHECK-INTERLEAVED-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP26]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP14]] +; CHECK-INTERLEAVED-NEXT: [[PREDPHI9]] = select <4 x i1> [[TMP16]], <4 x float> [[TMP18]], <4 x float> [[PREDPHI7]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll index c683ef897715a..12ddde4698ead 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction.ll @@ -761,16 +761,14 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP4:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) -; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) -; CHECK-NEXT: [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) +; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> [[TMP4]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP10]], [[TMP3]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] -; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP9]], <4 x float> [[PREDPHI]] -; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP3]], <4 x float> [[PREDPHI2]], <4 x float> [[VEC_PHI]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP7]], <4 x float> [[VEC_PHI]] +; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP9]], <4 x float> [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] From 49eabde306a941e3daefc46d0f22e1c7c1583630 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 5 Jun 2026 18:33:25 +0800 Subject: [PATCH 03/18] Remove unnecessary map_range --- llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index 2a947df080a09..711230f0d10a5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -331,9 +331,8 @@ VPPredicator::computeBlendEdges(VPPhi *Phi) { VPValue *VPPredicator::createMaskDisjunction(ArrayRef Edges, VPBasicBlock *VPBB) { - auto Dsts = map_range(Edges, [](auto E) { return E.second; }); - const VPBasicBlock *PostDom = *Dsts.begin(); - for (const VPBasicBlock *VPBB : drop_begin(Dsts)) + const VPBasicBlock *PostDom = Edges[0].second; + for (auto [_, VPBB] : drop_begin(Edges)) PostDom = cast(VPPDT.findNearestCommonDominator(PostDom, VPBB)); assert(VPPDT.dominates(VPBB, PostDom) && "Edges don't postdominate VPBB"); From 3724b66ef0f6aa2dd1c90a77a81808780309fadd Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 9 Jun 2026 18:54:40 +0800 Subject: [PATCH 04/18] Address review comments, add ASCII diagram and comments --- .../Transforms/Vectorize/VPlanPredicator.cpp | 44 ++++++++++++++----- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index 711230f0d10a5..086605ee21e75 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -86,8 +86,8 @@ class VPPredicator { /// Compute the "furthest up" set of edges for each incoming value of \Phi. MapVector computeBlendEdges(VPPhi *Phi); - /// Given a set of \p Edges that lead to \p VPBB, return the OR of all edges - /// or an equivalent block in-mask. + /// Given a set of \p Edges that each can reach \p VPBB, return the OR of all + /// edges, or an equivalent block in-mask. VPValue *createMaskDisjunction(ArrayRef Edges, VPBasicBlock *VPBB); public: @@ -245,11 +245,19 @@ void VPPredicator::createSwitchEdgeMasks(const VPInstruction *SI) { setEdgeMask(Src, DefaultDst, DefaultMask); } -// Compute the "furthest up" set of edges for each incoming value of a phi. -// // Start by keeping track of what edges lead to which value. Then see if any // node has the same value for all outgoing edges. If so then propagate that -// value up to every node it postdominates. +// value up to every node it postdominates. E.g: +// +// Entry Edges = {C->ɸ : %x, D->ɸ : %x, F->ɸ : %y} +// / \ [C,D,F all outgoing edges equal: go up postdom frontier] +// A B ~> {A->C : %x, A->D : %x, Entry->B : %y} +// / \ |\ [A all outgoing edges equal: go up postdom frontier] +// C D | E ~> {Entry->A : %x, Entry->B : %y} +// \ \ |/ +// \ | F +// \ | / +// ɸ = phi [%x, C], [%x, D], [%y, F] MapVector VPPredicator::computeBlendEdges(VPPhi *Phi) { MapVector Edges; @@ -266,9 +274,9 @@ VPPredicator::computeBlendEdges(VPPhi *Phi) { for (auto [InVal, InVPBB] : Phi->incoming_values_and_blocks()) AddEdge(InVPBB, Phi->getParent(), InVal); - // The root phi must postdominate every incoming block. Also don't touch - // phis in a reduction chain since they need to be in a specific structure - // for handle*Reductions. + // Only handle phis that postdominate every incoming block. Also don't touch + // phis in a reduction chain since they need to be in a specific structure for + // handle*Reductions. for (auto [InVal, InVPBB] : Phi->incoming_values_and_blocks()) if (!VPPDT.dominates(Phi->getParent(), InVPBB) || isa(InVal)) @@ -303,11 +311,11 @@ VPPredicator::computeBlendEdges(VPPhi *Phi) { if (!Common) continue; - // They have the same value: we can move the edges up + // They have the same value: we can move the edges up. for (EdgeTy Edge : OutEdges) Edges.erase(Edge); - // Peek through phis that are postdominated by VPBB + // Peek through phis that are postdominated by VPBB. if (auto *Phi = dyn_cast(Common)) if (VPPDT.dominates(VPBB, Phi->getParent())) { for (auto [InV, InVPBB] : Phi->incoming_values_and_blocks()) { @@ -317,7 +325,7 @@ VPPredicator::computeBlendEdges(VPPhi *Phi) { continue; } - // Iterate up through the post dominance frontier + // Iterate up through the post dominance frontier. for (const VPBlockBase *Frontier : VPPDF.find(VPBB)->second) { for (const VPBlockBase *FrontierSucc : Frontier->getSuccessors()) if (VPPDT.dominates(VPBB, FrontierSucc)) @@ -331,6 +339,19 @@ VPPredicator::computeBlendEdges(VPPhi *Phi) { VPValue *VPPredicator::createMaskDisjunction(ArrayRef Edges, VPBasicBlock *VPBB) { + // If the nearest common postdominator to all of Edges destinations isn't VPBB + // then we can use its block in-mask. E.g: + // + // A ... B + // \ \ / + // \ C + // \ / + // ... D ... + // \ | / + // VPBB + // + // If the edges are A->D and B->C, PostDom will be D. We can reuse Ds block + // in-mask. const VPBasicBlock *PostDom = Edges[0].second; for (auto [_, VPBB] : drop_begin(Edges)) PostDom = @@ -339,6 +360,7 @@ VPValue *VPPredicator::createMaskDisjunction(ArrayRef Edges, if (PostDom != VPBB) return getBlockInMask(PostDom); + // Otherwise, compute the disjunction of edges. VPValue *Mask = nullptr; for (auto [Src, ConstDst] : Edges) { auto *Dst = const_cast(ConstDst); From 05ac6a4fec242720b0f2b60a29fbfb8fc66bcd78 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 9 Jun 2026 19:07:06 +0800 Subject: [PATCH 05/18] Remove peeking through phis --- llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp | 10 ---------- llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll | 8 ++++++-- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index 086605ee21e75..be3b2f39a9841 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -315,16 +315,6 @@ VPPredicator::computeBlendEdges(VPPhi *Phi) { for (EdgeTy Edge : OutEdges) Edges.erase(Edge); - // Peek through phis that are postdominated by VPBB. - if (auto *Phi = dyn_cast(Common)) - if (VPPDT.dominates(VPBB, Phi->getParent())) { - for (auto [InV, InVPBB] : Phi->incoming_values_and_blocks()) { - AddEdge(InVPBB, Phi->getParent(), InV); - Worklist.insert(InVPBB); - } - continue; - } - // Iterate up through the post dominance frontier. for (const VPBlockBase *Frontier : VPPDF.find(VPBB)->second) { for (const VPBlockBase *FrontierSucc : Frontier->getSuccessors()) diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll index a994367670242..74bd3ec92b78f 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll @@ -665,6 +665,8 @@ define void @blend_chain_non_trivial(ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: Successor(s): merge.a ; CHECK-EMPTY: ; CHECK-NEXT: merge.a: +; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%c0> +; CHECK-NEXT: BLEND ir<%blend.a> = ir<%v1>/ir<%c0> ir<%v1>/vp<[[VP5]]> ; CHECK-NEXT: EMIT ir<%d0> = icmp sgt ir<%iv>, ir<0> ; CHECK-NEXT: Successor(s): if.b ; CHECK-EMPTY: @@ -673,14 +675,16 @@ define void @blend_chain_non_trivial(ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: Successor(s): if.b.inner ; CHECK-EMPTY: ; CHECK-NEXT: if.b.inner: -; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = logical-and ir<%d0>, ir<%cb> +; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = logical-and ir<%d0>, ir<%cb> ; CHECK-NEXT: Successor(s): merge.b.inner ; CHECK-EMPTY: ; CHECK-NEXT: merge.b.inner: ; CHECK-NEXT: Successor(s): merge.b ; CHECK-EMPTY: ; CHECK-NEXT: merge.b: -; CHECK-NEXT: EMIT ir<%sum> = add ir<%v1>, ir<%v2> +; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = not ir<%d0> +; CHECK-NEXT: BLEND ir<%blend.b> = ir<%v2>/ir<%d0> ir<%v2>/vp<[[VP7]]> +; CHECK-NEXT: EMIT ir<%sum> = add ir<%blend.a>, ir<%blend.b> ; CHECK-NEXT: EMIT store ir<%sum>, ir<%gep> ; CHECK-NEXT: Successor(s): loop.latch ; CHECK-EMPTY: From de9c116679a1c08986fb8e9c69d12df01c1718f5 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 10 Jun 2026 15:45:28 +0800 Subject: [PATCH 06/18] Remove reduction phi check We don't need it for now --- llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index be3b2f39a9841..05c5d4a8e308e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -274,12 +274,9 @@ VPPredicator::computeBlendEdges(VPPhi *Phi) { for (auto [InVal, InVPBB] : Phi->incoming_values_and_blocks()) AddEdge(InVPBB, Phi->getParent(), InVal); - // Only handle phis that postdominate every incoming block. Also don't touch - // phis in a reduction chain since they need to be in a specific structure for - // handle*Reductions. - for (auto [InVal, InVPBB] : Phi->incoming_values_and_blocks()) - if (!VPPDT.dominates(Phi->getParent(), InVPBB) || - isa(InVal)) + // Only handle phis that postdominate every incoming block. + for (const VPBlockBase *InVPBB : Phi->incoming_blocks()) + if (!VPPDT.dominates(Phi->getParent(), InVPBB)) return Edges; // Given a list of edges, check if they all have the same value and return it. From d458213e69ac84cb4b209b1185579d58191236f9 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 10 Jun 2026 17:38:38 +0800 Subject: [PATCH 07/18] Fix assertion comment --- llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index 05c5d4a8e308e..4feb8d44b80f6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -343,7 +343,7 @@ VPValue *VPPredicator::createMaskDisjunction(ArrayRef Edges, for (auto [_, VPBB] : drop_begin(Edges)) PostDom = cast(VPPDT.findNearestCommonDominator(PostDom, VPBB)); - assert(VPPDT.dominates(VPBB, PostDom) && "Edges don't postdominate VPBB"); + assert(VPPDT.dominates(VPBB, PostDom) && "VPBB doesn't postdominate edges"); if (PostDom != VPBB) return getBlockInMask(PostDom); From 0b553a5201ce649021ad88e9b0eb05aa3f12f3fc Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 10 Jun 2026 17:39:02 +0800 Subject: [PATCH 08/18] Remove poison coalescing for now until we have test cases --- llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index 4feb8d44b80f6..e4891a6a86d8b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -286,8 +286,6 @@ VPPredicator::computeBlendEdges(VPPhi *Phi) { VPValue *V = Edges.lookup(E); if (!V) return nullptr; - if (match(V, m_Poison())) - continue; if (!Common) Common = V; else if (Common != V) @@ -390,12 +388,6 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) { InValEdgesMap[Val].push_back(Edge); auto InValEdges = InValEdgesMap.takeVector(); - if (InValEdges.size() == 1) { - PhiR->replaceAllUsesWith(InValEdges[0].first); - PhiR->eraseFromParent(); - continue; - } - // Sort the incoming value order to match PhiR as much as possible. llvm::stable_sort(InValEdges, [&PhiR](auto &L, auto &R) { auto InVs = PhiR->incoming_values(); @@ -405,8 +397,6 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) { SmallVector OperandsWithMask; for (const auto &[InVPV, Edges] : InValEdges) { - if (match(InVPV, m_Poison())) - continue; OperandsWithMask.push_back(InVPV); OperandsWithMask.push_back(createMaskDisjunction(Edges, VPBB)); } From a7081b6607724e78aa7409920585866ec2b147ca Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 17 Jun 2026 11:52:21 +0200 Subject: [PATCH 09/18] Address review comments * Reuse previous method in DomiananceFrontier * Replace GetAllEqual with a map_range --- .../include/llvm/Analysis/DominanceFrontier.h | 1 - .../Transforms/Vectorize/VPlanPredicator.cpp | 21 ++++--------------- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/Analysis/DominanceFrontier.h b/llvm/include/llvm/Analysis/DominanceFrontier.h index 4a8ab96cf71a7..163914070d275 100644 --- a/llvm/include/llvm/Analysis/DominanceFrontier.h +++ b/llvm/include/llvm/Analysis/DominanceFrontier.h @@ -77,7 +77,6 @@ class DominanceFrontierBase { iterator end() { return Frontiers.end(); } const_iterator end() const { return Frontiers.end(); } iterator find(BlockT *B) { return Frontiers.find(B); } - const_iterator find(BlockT *B) const { return Frontiers.find(B); } const_iterator find(const BlockT *B) const { return Frontiers.find(B); } /// print - Convert to human readable form diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index e4891a6a86d8b..4396ebb0e58f4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -279,21 +279,6 @@ VPPredicator::computeBlendEdges(VPPhi *Phi) { if (!VPPDT.dominates(Phi->getParent(), InVPBB)) return Edges; - // Given a list of edges, check if they all have the same value and return it. - auto GetAllEqual = [&Edges](ArrayRef OutEdges) -> VPValue * { - VPValue *Common = nullptr; - for (EdgeTy E : OutEdges) { - VPValue *V = Edges.lookup(E); - if (!V) - return nullptr; - if (!Common) - Common = V; - else if (Common != V) - return nullptr; - } - return Common; - }; - SetVector Worklist(from_range, Phi->incoming_blocks()); while (!Worklist.empty()) { auto *VPBB = cast(Worklist.pop_back_val()); @@ -302,8 +287,10 @@ VPPredicator::computeBlendEdges(VPPhi *Phi) { SmallVector OutEdges; for (const VPBlockBase *Succ : VPBB->getSuccessors()) OutEdges.emplace_back(VPBB, cast(Succ)); - VPValue *Common = GetAllEqual(OutEdges); - if (!Common) + auto OutVals = + map_range(OutEdges, [&Edges](EdgeTy E) { return Edges.lookup(E); }); + VPValue *Common = *OutVals.begin(); + if (!Common || !all_equal(OutVals)) continue; // They have the same value: we can move the edges up. From f164a9f60fa79c59ed9258d54b9b0b0c17b8fceb Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 17 Jun 2026 12:42:13 +0200 Subject: [PATCH 10/18] Remove phi must postdominate each incoming block restriction After thinking about this for a bit this isn't needed. If a phi doesn't postdominate an incoming block, the incoming block will have an outgoing edge with no value. So we won't propagate any further up that incoming block anyway. What differs between this approach and https://github.com/llvm/llvm-project/pull/184838 is that the latter performs a full inverse DFS to see what blocks are reachable, whereas this just checks that the incoming values are the same at each postdominance frontier. The test case phi_doesnt_postdom_incoming shows a scenario where the full inverse DFS approach could simplify the edge to just c1 and !c1, but we calculate the conservative (but still correct) edges in this PR. --- .../Transforms/Vectorize/VPlanPredicator.cpp | 5 -- .../LoopVectorize/VPlan/predicator.ll | 68 +++++++++++++++++++ 2 files changed, 68 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index 4396ebb0e58f4..c4ddcc9142bb3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -274,11 +274,6 @@ VPPredicator::computeBlendEdges(VPPhi *Phi) { for (auto [InVal, InVPBB] : Phi->incoming_values_and_blocks()) AddEdge(InVPBB, Phi->getParent(), InVal); - // Only handle phis that postdominate every incoming block. - for (const VPBlockBase *InVPBB : Phi->incoming_blocks()) - if (!VPPDT.dominates(Phi->getParent(), InVPBB)) - return Edges; - SetVector Worklist(from_range, Phi->incoming_blocks()); while (!Worklist.empty()) { auto *VPBB = cast(Worklist.pop_back_val()); diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll index 74bd3ec92b78f..e868ad53dd587 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll @@ -832,3 +832,71 @@ latch: exit: ret void } + +; loop +; / \ +; A B +; / \ / +; \ C +; \ | +; latch +define void @phi_doesnt_postdom_incoming(i1 %c1, i1 %c2, i32 %x, i32 %y, ptr %p) { +; CHECK-LABEL: VPlan for loop in 'phi_doesnt_postdom_incoming' +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = CANONICAL-IV +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: +; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VP0:%[0-9]+]]> +; CHECK-NEXT: Successor(s): B +; CHECK-EMPTY: +; CHECK-NEXT: B: +; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%c1> +; CHECK-NEXT: Successor(s): A +; CHECK-EMPTY: +; CHECK-NEXT: A: +; CHECK-NEXT: Successor(s): C +; CHECK-EMPTY: +; CHECK-NEXT: C: +; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = logical-and ir<%c1>, ir<%c2> +; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = or vp<[[VP4]]>, vp<[[VP5]]> +; CHECK-NEXT: BLEND ir<%phi> = ir<%y>/vp<[[VP4]]> ir<%x>/vp<[[VP5]]> +; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%p>, ir<%iv> +; CHECK-NEXT: EMIT store ir<%phi>, ir<%gep>, vp<[[VP6]]> +; CHECK-NEXT: Successor(s): latch +; CHECK-EMPTY: +; CHECK-NEXT: latch: +; CHECK-NEXT: EMIT ir<%iv.next> = add ir<%iv>, ir<1> +; CHECK-NEXT: EMIT ir<%ec> = icmp eq ir<%iv.next>, ir<128> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1:%[0-9]+]]> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2:%[0-9]+]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %latch] + br i1 %c1, label %A, label %B + +A: + br i1 %c2, label %C, label %latch + +B: + br label %C + +C: + %phi = phi i32 [ %x, %A ], [ %y, %B ] + %gep = getelementptr i32, ptr %p, i32 %iv + store i32 %phi, ptr %gep + br label %latch + +latch: + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv.next, 128 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} From af139d858c351034fdbb93833b402f00b922d8aa Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 17 Jun 2026 13:05:37 +0200 Subject: [PATCH 11/18] Add test case for switch with duplicate edges --- .../LoopVectorize/VPlan/predicator.ll | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll index e868ad53dd587..483ab8ad94b50 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll @@ -900,3 +900,62 @@ latch: exit: ret void } + + +; loop +; / \ +; /\ / +; \ bb0 +; \ | +; latch +define void @phi_switch_same_edge(ptr %p) { +; CHECK-LABEL: VPlan for loop in 'phi_switch_same_edge' +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = CANONICAL-IV +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: +; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VP0:%[0-9]+]]> +; CHECK-NEXT: Successor(s): bb0 +; CHECK-EMPTY: +; CHECK-NEXT: bb0: +; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = icmp eq ir<%iv>, ir<0> +; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = icmp eq ir<%iv>, ir<1> +; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = or vp<[[VP4]]>, vp<[[VP5]]> +; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = not vp<[[VP6]]> +; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%p>, ir<%iv> +; CHECK-NEXT: EMIT store ir<0>, ir<%gep>, vp<[[VP6]]> +; CHECK-NEXT: Successor(s): latch +; CHECK-EMPTY: +; CHECK-NEXT: latch: +; CHECK-NEXT: EMIT ir<%iv.next> = add ir<%iv>, ir<1> +; CHECK-NEXT: EMIT ir<%ec> = icmp eq ir<%iv.next>, ir<128> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1:%[0-9]+]]> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2:%[0-9]+]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %latch] + switch i32 %iv, label %latch [ + i32 0, label %bb0 + i32 1, label %bb0 + ] + +bb0: + %phi = phi i32 [ 0, %loop ], [ 0, %loop ] + %gep = getelementptr i32, ptr %p, i32 %iv + store i32 %phi, ptr %gep + br label %latch + +latch: + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv.next, 128 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} From 25b8c281861d976cbd0992b5566d87d3e6fd9119 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 17 Jun 2026 13:07:06 +0200 Subject: [PATCH 12/18] Rename createMaskDisjunction --- llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index c4ddcc9142bb3..e1fe727b216b7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -88,7 +88,7 @@ class VPPredicator { /// Given a set of \p Edges that each can reach \p VPBB, return the OR of all /// edges, or an equivalent block in-mask. - VPValue *createMaskDisjunction(ArrayRef Edges, VPBasicBlock *VPBB); + VPValue *createBlendMaskForEdges(ArrayRef Edges, VPBasicBlock *VPBB); public: VPPredicator(VPlan &Plan) : VPDT(Plan), VPPDT(Plan), VPPDF(VPPDT) {} @@ -304,8 +304,8 @@ VPPredicator::computeBlendEdges(VPPhi *Phi) { return Edges; } -VPValue *VPPredicator::createMaskDisjunction(ArrayRef Edges, - VPBasicBlock *VPBB) { +VPValue *VPPredicator::createBlendMaskForEdges(ArrayRef Edges, + VPBasicBlock *VPBB) { // If the nearest common postdominator to all of Edges destinations isn't VPBB // then we can use its block in-mask. E.g: // @@ -380,7 +380,7 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) { SmallVector OperandsWithMask; for (const auto &[InVPV, Edges] : InValEdges) { OperandsWithMask.push_back(InVPV); - OperandsWithMask.push_back(createMaskDisjunction(Edges, VPBB)); + OperandsWithMask.push_back(createBlendMaskForEdges(Edges, VPBB)); } PHINode *IRPhi = cast_or_null(PhiR->getUnderlyingValue()); auto *Blend = From a30cf054bed8f0cef9497668e363913ef455916c Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 9 Jun 2026 20:10:59 +0800 Subject: [PATCH 13/18] Precommit tests --- .../LoopVectorize/VPlan/predicator.ll | 79 +++++++++++++++++++ .../Transforms/LoopVectorize/predicator.ll | 72 +++++++++++++++++ 2 files changed, 151 insertions(+) diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll index 483ab8ad94b50..d9766bbc5c4c7 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll @@ -959,3 +959,82 @@ latch: exit: ret void } + +; loop +; / \ +; A \ +; / \ B +; \ \ / +; C D +; \/ +; latch +define void @look_thru_phi(i1 %c1, i1 %c2, i32 %x, i32 %y, ptr %p) { +; CHECK-LABEL: VPlan for loop in 'look_thru_phi' +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = CANONICAL-IV +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: +; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VP0:%[0-9]+]]> +; CHECK-NEXT: Successor(s): B +; CHECK-EMPTY: +; CHECK-NEXT: B: +; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%c1> +; CHECK-NEXT: Successor(s): A +; CHECK-EMPTY: +; CHECK-NEXT: A: +; CHECK-NEXT: Successor(s): D +; CHECK-EMPTY: +; CHECK-NEXT: D: +; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%c2> +; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = logical-and ir<%c1>, vp<[[VP5]]> +; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = or vp<[[VP4]]>, vp<[[VP6]]> +; CHECK-NEXT: BLEND ir<%phi1> = ir<%y>/vp<[[VP4]]> ir<%x>/vp<[[VP6]]> +; CHECK-NEXT: Successor(s): C +; CHECK-EMPTY: +; CHECK-NEXT: C: +; CHECK-NEXT: EMIT vp<[[VP8:%[0-9]+]]> = logical-and ir<%c1>, ir<%c2> +; CHECK-NEXT: Successor(s): latch +; CHECK-EMPTY: +; CHECK-NEXT: latch: +; CHECK-NEXT: BLEND ir<%phi> = ir<%phi1>/vp<[[VP7]]> ir<%x>/vp<[[VP8]]> +; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%p>, ir<%iv> +; CHECK-NEXT: EMIT store ir<%phi>, ir<%gep> +; CHECK-NEXT: EMIT ir<%iv.next> = add ir<%iv>, ir<1> +; CHECK-NEXT: EMIT ir<%ec> = icmp eq ir<%iv.next>, ir<128> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1:%[0-9]+]]> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2:%[0-9]+]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %latch] + br i1 %c1, label %A, label %B + +A: + br i1 %c2, label %C, label %D + +B: + br label %D + +C: + br label %latch + +D: + %phi1 = phi i32 [%x, %A], [%y, %B] + br label %latch + +latch: + %phi = phi i32 [ %x, %C ], [ %phi1, %D ] + %gep = getelementptr i32, ptr %p, i32 %iv + store i32 %phi, ptr %gep + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv.next, 128 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/predicator.ll b/llvm/test/Transforms/LoopVectorize/predicator.ll index 57414ae62c341..a03855edd8de7 100644 --- a/llvm/test/Transforms/LoopVectorize/predicator.ll +++ b/llvm/test/Transforms/LoopVectorize/predicator.ll @@ -292,3 +292,75 @@ latch: exit: ret void } + +; loop +; / \ +; A \ +; / \ B +; \ \ / +; C D +; \/ +; latch +define void @look_thru_phi(i1 %c1, i1 %c2, i32 %x, i32 %y, ptr %p) { +; CHECK-LABEL: define void @look_thru_phi( +; CHECK-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i1> poison, i1 [[C2]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT3]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i1> poison, i1 [[C1]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT5]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT4]], splat (i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[BROADCAST_SPLAT6]], <4 x i1> [[TMP0]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[BROADCAST_SPLAT6]], <4 x i1> [[BROADCAST_SPLAT4]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> [[PREDPHI]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[P]], i32 [[INDEX]] +; CHECK-NEXT: store <4 x i32> [[PREDPHI7]], ptr [[TMP3]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %latch] + br i1 %c1, label %A, label %B + +A: + br i1 %c2, label %C, label %D + +B: + br label %D + +C: + br label %latch + +D: + %phi1 = phi i32 [%x, %A], [%y, %B] + br label %latch + +latch: + %phi = phi i32 [ %x, %C ], [ %phi1, %D ] + %gep = getelementptr i32, ptr %p, i32 %iv + store i32 %phi, ptr %gep + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv.next, 128 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} From 1f862ec286c08ddf7c309a92c5952108929696cc Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 9 Jun 2026 20:12:38 +0800 Subject: [PATCH 14/18] Peek through inner phi's nested values --- .../Transforms/Vectorize/VPlanPredicator.cpp | 21 +++++++++++++++++++ .../LoopVectorize/VPlan/predicator.ll | 10 +++------ .../Transforms/LoopVectorize/predicator.ll | 10 +-------- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index e1fe727b216b7..a124240b49366 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -274,6 +274,10 @@ VPPredicator::computeBlendEdges(VPPhi *Phi) { for (auto [InVal, InVPBB] : Phi->incoming_values_and_blocks()) AddEdge(InVPBB, Phi->getParent(), InVal); + // Don't optimize any reduction chains for now. + if (any_of(Phi->incoming_values(), IsaPred)) + return Edges; + SetVector Worklist(from_range, Phi->incoming_blocks()); while (!Worklist.empty()) { auto *VPBB = cast(Worklist.pop_back_val()); @@ -292,6 +296,17 @@ VPPredicator::computeBlendEdges(VPPhi *Phi) { for (EdgeTy Edge : OutEdges) Edges.erase(Edge); + // If the value is a phi postdominated by VPBB, then look through the inner + // incoming values instead of propagating the phi. + if (auto *Phi = dyn_cast(Common)) + if (VPPDT.dominates(VPBB, Phi->getParent())) { + for (auto [InV, InVPBB] : Phi->incoming_values_and_blocks()) { + AddEdge(InVPBB, Phi->getParent(), InV); + Worklist.insert(InVPBB); + } + continue; + } + // Iterate up through the post dominance frontier. for (const VPBlockBase *Frontier : VPPDF.find(VPBB)->second) { for (const VPBlockBase *FrontierSucc : Frontier->getSuccessors()) @@ -370,6 +385,12 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) { InValEdgesMap[Val].push_back(Edge); auto InValEdges = InValEdgesMap.takeVector(); + if (InValEdges.size() == 1) { + PhiR->replaceAllUsesWith(InValEdges[0].first); + PhiR->eraseFromParent(); + continue; + } + // Sort the incoming value order to match PhiR as much as possible. llvm::stable_sort(InValEdges, [&PhiR](auto &L, auto &R) { auto InVs = PhiR->incoming_values(); diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll index d9766bbc5c4c7..84f681958550f 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll @@ -665,8 +665,6 @@ define void @blend_chain_non_trivial(ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: Successor(s): merge.a ; CHECK-EMPTY: ; CHECK-NEXT: merge.a: -; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%c0> -; CHECK-NEXT: BLEND ir<%blend.a> = ir<%v1>/ir<%c0> ir<%v1>/vp<[[VP5]]> ; CHECK-NEXT: EMIT ir<%d0> = icmp sgt ir<%iv>, ir<0> ; CHECK-NEXT: Successor(s): if.b ; CHECK-EMPTY: @@ -675,16 +673,14 @@ define void @blend_chain_non_trivial(ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: Successor(s): if.b.inner ; CHECK-EMPTY: ; CHECK-NEXT: if.b.inner: -; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = logical-and ir<%d0>, ir<%cb> +; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = logical-and ir<%d0>, ir<%cb> ; CHECK-NEXT: Successor(s): merge.b.inner ; CHECK-EMPTY: ; CHECK-NEXT: merge.b.inner: ; CHECK-NEXT: Successor(s): merge.b ; CHECK-EMPTY: ; CHECK-NEXT: merge.b: -; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = not ir<%d0> -; CHECK-NEXT: BLEND ir<%blend.b> = ir<%v2>/ir<%d0> ir<%v2>/vp<[[VP7]]> -; CHECK-NEXT: EMIT ir<%sum> = add ir<%blend.a>, ir<%blend.b> +; CHECK-NEXT: EMIT ir<%sum> = add ir<%v1>, ir<%v2> ; CHECK-NEXT: EMIT store ir<%sum>, ir<%gep> ; CHECK-NEXT: Successor(s): loop.latch ; CHECK-EMPTY: @@ -996,7 +992,7 @@ define void @look_thru_phi(i1 %c1, i1 %c2, i32 %x, i32 %y, ptr %p) { ; CHECK-NEXT: Successor(s): latch ; CHECK-EMPTY: ; CHECK-NEXT: latch: -; CHECK-NEXT: BLEND ir<%phi> = ir<%phi1>/vp<[[VP7]]> ir<%x>/vp<[[VP8]]> +; CHECK-NEXT: BLEND ir<%phi> = ir<%x>/ir<%c1> ir<%y>/vp<[[VP4]]> ; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%p>, ir<%iv> ; CHECK-NEXT: EMIT store ir<%phi>, ir<%gep> ; CHECK-NEXT: EMIT ir<%iv.next> = add ir<%iv>, ir<1> diff --git a/llvm/test/Transforms/LoopVectorize/predicator.ll b/llvm/test/Transforms/LoopVectorize/predicator.ll index a03855edd8de7..98529cb1c227d 100644 --- a/llvm/test/Transforms/LoopVectorize/predicator.ll +++ b/llvm/test/Transforms/LoopVectorize/predicator.ll @@ -311,15 +311,7 @@ define void @look_thru_phi(i1 %c1, i1 %c2, i32 %x, i32 %y, ptr %p) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i1> poison, i1 [[C2]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT3]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i1> poison, i1 [[C1]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT5]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT4]], splat (i1 true) -; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[BROADCAST_SPLAT6]], <4 x i1> [[TMP0]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[BROADCAST_SPLAT6]], <4 x i1> [[BROADCAST_SPLAT4]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[C1]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> [[BROADCAST_SPLAT]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] From 31255ff7713b1081b38bdc211f155670c5487e4b Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 19 Jun 2026 13:33:57 +0200 Subject: [PATCH 15/18] Don't peek thru phis with multiple uses --- .../Transforms/Vectorize/VPlanPredicator.cpp | 2 +- .../LoopVectorize/VPlan/predicator.ll | 76 +++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index a124240b49366..0d88b96f86a8b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -299,7 +299,7 @@ VPPredicator::computeBlendEdges(VPPhi *Phi) { // If the value is a phi postdominated by VPBB, then look through the inner // incoming values instead of propagating the phi. if (auto *Phi = dyn_cast(Common)) - if (VPPDT.dominates(VPBB, Phi->getParent())) { + if (Phi->hasOneUse() && VPPDT.dominates(VPBB, Phi->getParent())) { for (auto [InV, InVPBB] : Phi->incoming_values_and_blocks()) { AddEdge(InVPBB, Phi->getParent(), InV); Worklist.insert(InVPBB); diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll index 84f681958550f..6359e2e0d4200 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll @@ -1034,3 +1034,79 @@ latch: exit: ret void } + + +; Same as the test above, but shouldn't look thru the phi because it has +; multiple uses. +define void @look_thru_phi_multi_use(i1 %c1, i1 %c2, i32 %x, i32 %y, ptr %p) { +; CHECK-LABEL: VPlan for loop in 'look_thru_phi_multi_use' +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = CANONICAL-IV +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: +; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VP0:%[0-9]+]]> +; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%p>, ir<%iv> +; CHECK-NEXT: Successor(s): B +; CHECK-EMPTY: +; CHECK-NEXT: B: +; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%c1> +; CHECK-NEXT: Successor(s): A +; CHECK-EMPTY: +; CHECK-NEXT: A: +; CHECK-NEXT: Successor(s): D +; CHECK-EMPTY: +; CHECK-NEXT: D: +; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%c2> +; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = logical-and ir<%c1>, vp<[[VP5]]> +; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = or vp<[[VP4]]>, vp<[[VP6]]> +; CHECK-NEXT: BLEND ir<%phi1> = ir<%y>/vp<[[VP4]]> ir<%x>/vp<[[VP6]]> +; CHECK-NEXT: EMIT store ir<%phi1>, ir<%gep>, vp<[[VP7]]> +; CHECK-NEXT: Successor(s): C +; CHECK-EMPTY: +; CHECK-NEXT: C: +; CHECK-NEXT: EMIT vp<[[VP8:%[0-9]+]]> = logical-and ir<%c1>, ir<%c2> +; CHECK-NEXT: Successor(s): latch +; CHECK-EMPTY: +; CHECK-NEXT: latch: +; CHECK-NEXT: BLEND ir<%phi> = ir<%phi1>/vp<[[VP7]]> ir<%x>/vp<[[VP8]]> +; CHECK-NEXT: EMIT store ir<%phi>, ir<%gep> +; CHECK-NEXT: EMIT ir<%iv.next> = add ir<%iv>, ir<1> +; CHECK-NEXT: EMIT ir<%ec> = icmp eq ir<%iv.next>, ir<128> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1:%[0-9]+]]> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2:%[0-9]+]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %latch] + %gep = getelementptr i32, ptr %p, i32 %iv + br i1 %c1, label %A, label %B + +A: + br i1 %c2, label %C, label %D + +B: + br label %D + +C: + br label %latch + +D: + %phi1 = phi i32 [%x, %A], [%y, %B] + store i32 %phi1, ptr %gep + br label %latch + +latch: + %phi = phi i32 [ %x, %C ], [ %phi1, %D ] + store i32 %phi, ptr %gep + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv.next, 128 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} From 77cdce87334f599d6a3bfc70b4eeae69df8af654 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 19 Jun 2026 13:59:59 +0200 Subject: [PATCH 16/18] Move diagram below check lines --- .../Transforms/LoopVectorize/VPlan/predicator.ll | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll index 6359e2e0d4200..37f03b102c6e4 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll @@ -956,14 +956,6 @@ exit: ret void } -; loop -; / \ -; A \ -; / \ B -; \ \ / -; C D -; \/ -; latch define void @look_thru_phi(i1 %c1, i1 %c2, i32 %x, i32 %y, ptr %p) { ; CHECK-LABEL: VPlan for loop in 'look_thru_phi' ; CHECK-NEXT: vector loop: { @@ -1003,6 +995,14 @@ define void @look_thru_phi(i1 %c1, i1 %c2, i32 %x, i32 %y, ptr %p) { ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block ; +; loop +; / \ +; A \ +; / \ B +; \ \ / +; C D +; \/ +; latch entry: br label %loop From 415c26bdd7cfe72be7b61938b928be78a9da149a Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 4 Jun 2026 13:10:07 +0800 Subject: [PATCH 17/18] Maintain SSA by replacing MaskedCond with phis --- llvm/lib/Transforms/Vectorize/VPlan.h | 3 - .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 5 - .../Transforms/Vectorize/VPlanTransforms.cpp | 97 +++++++++++++++---- .../Transforms/Vectorize/VPlanVerifier.cpp | 18 ---- .../predicated-early-exits-interleave.ll | 18 ++-- .../predicated-multiple-exits.ll | 38 +++----- .../LoopVectorize/single_early_exit.ll | 62 ++++++++++++ 7 files changed, 162 insertions(+), 79 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index bd7f87dbb3a5d..1ec24da4641e0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1381,9 +1381,6 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, /// Returns true if the VPInstruction does not need masking. bool alwaysUnmasked() const { - if (Opcode == VPInstruction::MaskedCond) - return false; - // For now only VPInstructions with underlying values use masks. // TODO: provide masks to VPInstructions w/o underlying values. if (!getUnderlyingValue()) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f45b9e4f6c35b..2635e37a85518 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -641,7 +641,6 @@ unsigned VPInstruction::getNumOperandsForOpcode() const { case VPInstruction::ExtractLastLane: case VPInstruction::ExtractLastPart: case VPInstruction::ExtractPenultimateElement: - case VPInstruction::MaskedCond: case VPInstruction::Not: case VPInstruction::Reverse: case VPInstruction::Unpack: @@ -1630,7 +1629,6 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { case VPInstruction::FirstOrderRecurrenceSplice: case VPInstruction::LogicalAnd: case VPInstruction::LogicalOr: - case VPInstruction::MaskedCond: case VPInstruction::Not: case VPInstruction::PtrAdd: case VPInstruction::WideIVStep: @@ -1780,9 +1778,6 @@ void VPInstruction::printRecipe(raw_ostream &O, const Twine &Indent, case VPInstruction::ExitingIVValue: O << "exiting-iv-value"; break; - case VPInstruction::MaskedCond: - O << "masked-cond"; - break; case VPInstruction::ExtractLane: O << "extract-lane"; break; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index cfe21de1c7595..7204450936504 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1690,11 +1690,6 @@ static void simplifyRecipe(VPSingleDefRecipe *Def) { return; } - // Simplify MaskedCond with no block mask to its single operand. - if (match(Def, m_VPInstruction()) && - !cast(Def)->isMasked()) - return Def->replaceAllUsesWith(Def->getOperand(0)); - // Look through ExtractLastLane. if (match(Def, m_ExtractLastLane(m_VPValue(A)))) { if (match(A, m_BuildVector())) { @@ -4215,17 +4210,6 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) { continue; } - // Lower MaskedCond with block mask to LogicalAnd. - if (match(&R, m_VPInstruction())) { - auto *VPI = cast(&R); - assert(VPI->isMasked() && - "Unmasked MaskedCond should be simplified earlier"); - VPI->replaceAllUsesWith(Builder.createNaryOp( - VPInstruction::LogicalAnd, {VPI->getMask(), VPI->getOperand(0)})); - VPI->eraseFromParent(); - continue; - } - // Lower CanonicalIVIncrementForPart to plain Add. if (match( &R, @@ -4410,6 +4394,74 @@ struct EarlyExitInfo { VPIRBasicBlock *EarlyExitVPBB; VPValue *CondToExit; }; +static VPValue *repairSSA(VPValue *Src, VPBasicBlock *SrcVPBB, VPValue *Other, + VPBasicBlock *VPBB, VPDominatorTree &VPDT, + DenseMap &Phis) { + + if (VPDT.dominates(SrcVPBB, VPBB)) + return Src; + if (VPDT.dominates(VPBB, SrcVPBB)) + return Other; + if (VPPhi *Phi = Phis.lookup(VPBB)) + return Phi; + + SmallVector InVals; + for (auto *Pred : VPBB->predecessors()) + InVals.push_back( + repairSSA(Src, SrcVPBB, Other, cast(Pred), VPDT, Phis)); + if (all_equal(InVals)) + return InVals[0]; + + VPPhi *Phi = VPBuilder(VPBB, VPBB->getFirstNonPhi()).createScalarPhi(InVals); + Phis[VPBB] = Phi; + return Phi; +} + +/// Insert phi nodes to maintain SSA starting from \p VPBB, such that the +/// resulting value is \p \Src on all paths that go through \p SrcVPBB, and \p +/// Other otherwise. +static VPValue *repairSSA(VPValue *Src, VPBasicBlock *SrcVPBB, VPValue *Other, + VPBasicBlock *VPBB, VPDominatorTree &VPDT) { + DenseMap Phis; + return repairSSA(Src, SrcVPBB, Other, VPBB, VPDT, Phis); +} + +// After handling early exits, the CondToExits and live outs may no longer be in +// SSA if their defining blocks are predicated, so insert phis to repair them. +static void repairEarlyExitSSA(VPlan &Plan, VPDominatorTree &VPDT, + ArrayRef Exits, + VPBasicBlock *LatchVPBB, + ArrayRef LiveOutVPBBs) { + // Repair all CondToExits. The condition is false on any path that doesn't go + // through the exiting block. + for (auto [EarlyExitingVPBB, _, CondToExit] : Exits) { + VPValue *Repaired = repairSSA(CondToExit, EarlyExitingVPBB, Plan.getFalse(), + LatchVPBB, VPDT); + + CondToExit->replaceUsesWithIf(Repaired, [&](VPUser &U, unsigned I) { + auto &R = cast(U); + return VPDT.dominates(LatchVPBB, R.getParent()) && + R.getVPSingleValue() != Repaired; + }); + } + + // Repair any live outs. The value is poison on any path that didn't pass + // through the def's block. + for (VPBasicBlock *LiveOutVPBB : LiveOutVPBBs) + for (VPRecipeBase &R : *LiveOutVPBB) { + VPValue *LiveOut; + if (!match(&R, + m_CombineOr(m_ExtractLastPart(m_VPValue(LiveOut)), + m_ExtractLane(m_VPValue(), m_VPValue(LiveOut))))) + continue; + VPValue *Poison = + Plan.getOrAddLiveIn(PoisonValue::get(LiveOut->getScalarType())); + VPValue *Repaired = + repairSSA(LiveOut, LiveOut->getDefiningRecipe()->getParent(), Poison, + LatchVPBB, VPDT); + R.replaceUsesOfWith(LiveOut, Repaired); + } +} /// Update \p Plan to mask memory operations in the loop based on whether the /// early exit is taken or not. @@ -4615,14 +4667,12 @@ bool VPlanTransforms::handleUncountableEarlyExits( m_BranchOnCond(m_VPValue(CondOfEarlyExitingVPBB))); assert(Matched && "Terminator must be BranchOnCond"); - // Insert the MaskedCond in the EarlyExitingVPBB so the predicator adds - // the correct block mask. VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator()); - auto *CondToEarlyExit = EarlyExitingBuilder.createNaryOp( - VPInstruction::MaskedCond, + auto *CondToEarlyExit = TrueSucc == ExitBlock ? CondOfEarlyExitingVPBB - : EarlyExitingBuilder.createNot(CondOfEarlyExitingVPBB)); + : EarlyExitingBuilder.createNot(CondOfEarlyExitingVPBB); + assert((isa(CondOfEarlyExitingVPBB) || !VPDT.properlyDominates(EarlyExitingVPBB, LatchVPBB) || VPDT.properlyDominates( @@ -4819,6 +4869,11 @@ bool VPlanTransforms::handleUncountableEarlyExits( DispatchBuilder.setInsertPoint(CurrentBB); } + VPDT.recalculate(Plan); + SmallVector LiveOutVPBBs = {MiddleVPBB}; + append_range(LiveOutVPBBs, VectorEarlyExitVPBBs); + repairEarlyExitSSA(Plan, VPDT, Exits, LatchVPBB, LiveOutVPBBs); + return true; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 362bfe92f573e..bfe29a57e8f72 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -246,11 +246,6 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { return false; } - // MaskedCond may be used from blocks it don't dominate; the block will be - // linearized and it will dominate its users after linearization. - if (match(&R, m_VPInstruction())) - continue; - for (const VPUser *U : V->users()) { auto *UI = cast(U); if (isa(UI) && @@ -295,19 +290,6 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { continue; } - // Recipes in blocks with a MaskedCond may be used in exit blocks; the - // block will be linearized and its recipes will dominate their users - // after linearization. - bool BlockHasMaskedCond = any_of(*VPBB, [](const VPRecipeBase &R) { - return match(&R, m_VPInstruction()); - }); - if (BlockHasMaskedCond && - any_of(VPBB->getPlan()->getExitBlocks(), [UI](VPIRBasicBlock *EB) { - return is_contained(EB->getPredecessors(), UI->getParent()); - })) { - continue; - } - errs() << "Use before def!\n"; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) VPSlotTracker Tracker(VPBB->getPlan()); diff --git a/llvm/test/Transforms/LoopVectorize/predicated-early-exits-interleave.ll b/llvm/test/Transforms/LoopVectorize/predicated-early-exits-interleave.ll index df24b2838f6c4..d07ea380f55b8 100644 --- a/llvm/test/Transforms/LoopVectorize/predicated-early-exits-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/predicated-early-exits-interleave.ll @@ -185,10 +185,8 @@ define i64 @three_early_exits() { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[GEP_A]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], splat (i8 -42) -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD1]], splat (i8 -42) -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP2]], splat (i1 true) -; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <4 x i8> [[WIDE_LOAD]], splat (i8 -42) +; CHECK-NEXT: [[TMP5:%.*]] = icmp sge <4 x i8> [[WIDE_LOAD1]], splat (i8 -42) ; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], splat (i8 42) ; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD1]], splat (i8 42) ; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) @@ -201,8 +199,6 @@ define i64 @three_early_exits() { ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD1]], [[WIDE_LOAD3]] -; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP11]], <4 x i1> [[TMP15]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP6]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP19:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]] @@ -211,16 +207,18 @@ define i64 @three_early_exits() { ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP21]], align 1 ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD4]] ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD1]], [[WIDE_LOAD5]] -; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP18]], <4 x i1> [[TMP22]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP19]], <4 x i1> [[TMP23]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP26]], align 1 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP27]], align 1 ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD6]] ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD1]], [[WIDE_LOAD7]] -; CHECK-NEXT: [[TMP30:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP28]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP11]], <4 x i1> [[TMP15]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP18]], <4 x i1> [[TMP22]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP19]], <4 x i1> [[TMP23]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP30:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> zeroinitializer, <4 x i1> [[TMP28]] +; CHECK-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> zeroinitializer, <4 x i1> [[TMP29]] ; CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> splat (i1 true), <4 x i1> [[TMP24]] ; CHECK-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> splat (i1 true), <4 x i1> [[TMP25]] ; CHECK-NEXT: [[TMP34:%.*]] = select <4 x i1> [[TMP32]], <4 x i1> splat (i1 true), <4 x i1> [[TMP30]] diff --git a/llvm/test/Transforms/LoopVectorize/predicated-multiple-exits.ll b/llvm/test/Transforms/LoopVectorize/predicated-multiple-exits.ll index f672f649915a4..76b693331be28 100644 --- a/llvm/test/Transforms/LoopVectorize/predicated-multiple-exits.ll +++ b/llvm/test/Transforms/LoopVectorize/predicated-multiple-exits.ll @@ -17,15 +17,14 @@ define i64 @diamond_with_2_early_exits() { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> zeroinitializer, <4 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = freeze <4 x i1> [[TMP10]] @@ -94,24 +93,23 @@ define i64 @three_early_exits() { ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[CHECK_B:.*]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], splat (i8 -42) -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true) +; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i8> [[WIDE_LOAD]], splat (i8 -42) ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], splat (i8 42) ; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] -; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]] -; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> zeroinitializer, <4 x i1> [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP8]], <4 x i1> splat (i1 true), <4 x i1> [[TMP12]] ; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> splat (i1 true), <4 x i1> [[TMP15]] ; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP17]] @@ -193,11 +191,9 @@ define i64 @nested_diamond_inner_exits() { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD2]], zeroinitializer @@ -206,9 +202,10 @@ define i64 @nested_diamond_inner_exits() { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]] -; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> zeroinitializer, <4 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP13]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> splat (i1 true), <4 x i1> [[TMP12]] ; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> splat (i1 true), <4 x i1> [[TMP15]] @@ -297,14 +294,14 @@ define i64 @chain_of_3_exits() { ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_C]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP6]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]] +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP6]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> splat (i1 true), <4 x i1> [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i1> splat (i1 true), <4 x i1> [[TMP10]] @@ -383,26 +380,24 @@ define i64 @four_exits_2x2_diamond() { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> zeroinitializer, <4 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP10:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD3]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) ; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]] -; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]] -; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> zeroinitializer, <4 x i1> [[TMP12]] +; CHECK-NEXT: [[PREDPHI6:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> splat (i1 true), <4 x i1> [[TMP8]] -; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> splat (i1 true), <4 x i1> [[TMP13]] -; CHECK-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> splat (i1 true), <4 x i1> [[TMP15]] +; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> splat (i1 true), <4 x i1> [[PREDPHI5]] +; CHECK-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP11]], <4 x i1> splat (i1 true), <4 x i1> [[PREDPHI6]] ; CHECK-NEXT: [[TMP19:%.*]] = freeze <4 x i1> [[TMP18]] ; CHECK-NEXT: [[CMP1A:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP19]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 @@ -420,7 +415,7 @@ define i64 @four_exits_2x2_diamond() { ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP8]], i64 [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: br i1 [[TMP23]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[LOOP_LATCH:.*]] ; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP13]], i64 [[FIRST_ACTIVE_LANE]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[PREDPHI5]], i64 [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: br i1 [[TMP24]], label %[[VECTOR_EARLY_EXIT_2:.*]], label %[[VECTOR_EARLY_EXIT_3:.*]] ; CHECK: [[VECTOR_EARLY_EXIT_3]]: ; CHECK-NEXT: br label %[[LOOP_END1]] @@ -577,10 +572,9 @@ define i64 @diamond_exits_overlapping_conditions() { ; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_C]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] -; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> zeroinitializer, <4 x i1> [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i1> splat (i1 true), <4 x i1> [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll index 3848ac68a07c8..e90d95e451bfa 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll @@ -620,6 +620,67 @@ exit: %res = phi i64 [ -1, %entry ], [ -2, %then ], [ 0, %loop.latch ], [ %iv, %loop.header ] ret i64 %res } + +define i64 @same_exit_block_phi_of_consts_iv_next_and() { +; CHECK-LABEL: define i64 @same_exit_block_phi_of_consts_iv_next_and() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY_INTERIM:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 3, [[INDEX1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 +; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[VECTOR_BODY_INTERIM]] +; CHECK: vector.body.interim: +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br label [[LOOP_END:%.*]] +; CHECK: vector.early.exit: +; CHECK-NEXT: br label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[VECTOR_EARLY_EXIT]] ], [ 1, [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %index.next.and = and i64 %index.next, 4294967295 + %exitcond = icmp ne i64 %index.next.and, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 0, %loop ], [ 1, %loop.inc ] + ret i64 %retval +} + ;. ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} @@ -634,4 +695,5 @@ exit: ; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META1]]} ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]} ; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]} +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]} ;. From 74457c74d7f594164437c26066338704d0416b82 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 9 Jun 2026 18:10:38 +0800 Subject: [PATCH 18/18] Address review comments * Explain when to use repairSSA * repairSSA -> repairSSAImpl --- .../lib/Transforms/Vectorize/VPlanTransforms.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 7204450936504..0493cb83ee98b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -4394,9 +4394,10 @@ struct EarlyExitInfo { VPIRBasicBlock *EarlyExitVPBB; VPValue *CondToExit; }; -static VPValue *repairSSA(VPValue *Src, VPBasicBlock *SrcVPBB, VPValue *Other, - VPBasicBlock *VPBB, VPDominatorTree &VPDT, - DenseMap &Phis) { +static VPValue *repairSSAImpl(VPValue *Src, VPBasicBlock *SrcVPBB, + VPValue *Other, VPBasicBlock *VPBB, + VPDominatorTree &VPDT, + DenseMap &Phis) { if (VPDT.dominates(SrcVPBB, VPBB)) return Src; @@ -4407,8 +4408,8 @@ static VPValue *repairSSA(VPValue *Src, VPBasicBlock *SrcVPBB, VPValue *Other, SmallVector InVals; for (auto *Pred : VPBB->predecessors()) - InVals.push_back( - repairSSA(Src, SrcVPBB, Other, cast(Pred), VPDT, Phis)); + InVals.push_back(repairSSAImpl(Src, SrcVPBB, Other, + cast(Pred), VPDT, Phis)); if (all_equal(InVals)) return InVals[0]; @@ -4419,11 +4420,12 @@ static VPValue *repairSSA(VPValue *Src, VPBasicBlock *SrcVPBB, VPValue *Other, /// Insert phi nodes to maintain SSA starting from \p VPBB, such that the /// resulting value is \p \Src on all paths that go through \p SrcVPBB, and \p -/// Other otherwise. +/// Other otherwise. Use if the CFG has been modified such that a def no longer +/// dominates all its uses. static VPValue *repairSSA(VPValue *Src, VPBasicBlock *SrcVPBB, VPValue *Other, VPBasicBlock *VPBB, VPDominatorTree &VPDT) { DenseMap Phis; - return repairSSA(Src, SrcVPBB, Other, VPBB, VPDT, Phis); + return repairSSAImpl(Src, SrcVPBB, Other, VPBB, VPDT, Phis); } // After handling early exits, the CondToExits and live outs may no longer be in