#include "AMDGPU.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
DominatorTree *DT;
StackVector Stack;
+ LoopInfo *LI;
+
bool isTopOfStack(BasicBlock *BB);
Value *popSaved();
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
push(Term->getSuccessor(0), Arg);
-}
-
-/// \brief Close the last opened control flow
+}/// \brief Close the last opened control flow
void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
+ llvm::Loop *L = LI->getLoopFor(BB);
+
+ if (L && L->getHeader() == BB) {
+ // We can't insert an EndCF call into a loop header, because it will
+ // get executed on every iteration of the loop, when it should be
+ // executed only once before the loop.
+ SmallVector <BasicBlock*, 8> Latches;
+ L->getLoopLatches(Latches);
+
+ std::vector<BasicBlock*> Preds;
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+ if (std::find(Latches.begin(), Latches.end(), *PI) == Latches.end())
+ Preds.push_back(*PI);
+ }
+ BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", nullptr, DT,
+ LI, false);
+ }
+
CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt());
}
/// recognize if/then/else and loops.
bool SIAnnotateControlFlow::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
E = df_end(&F.getEntryBlock()); I != E; ++I) {
--- /dev/null
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
+
+; This tests that the llvm.SI.end.cf intrinsic is not inserted into the
+; loop block. This intrinsic will be lowered to s_or_b64 by the code
+; generator.
+
+; CHECK-LABEL: {{^}}test:
+
+; This is was lowered from the llvm.SI.end.cf intrinsic:
+; CHECK: s_or_b64 exec, exec
+
+; CHECK: [[LOOP_LABEL:[0-9A-Za-z_]+]]: ; %loop{{$}}
+; CHECK-NOT: s_or_b64 exec, exec
+; CHECK: s_cbranch_execnz [[LOOP_LABEL]]
+define void @test(i32 addrspace(1)* %out, i32 %cond) {
+entry:
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %if, label %loop
+
+if:
+ store i32 0, i32 addrspace(1)* %out
+ br label %loop
+
+loop:
+ %tmp1 = phi i32 [0, %entry], [0, %if], [%inc, %loop]
+ %inc = add i32 %tmp1, %cond
+ %tmp2 = icmp ugt i32 %inc, 10
+ br i1 %tmp2, label %done, label %loop
+
+done:
+ %tmp3 = getelementptr i32 addrspace(1)* %out, i64 1
+ store i32 %inc, i32 addrspace(1)* %tmp3
+ ret void
+}