Add a new pass to simplify specific half_powr function calls. This is
authorDan Gohman <gohman@apple.com>
Tue, 4 Nov 2008 23:41:45 +0000 (23:41 +0000)
committerDan Gohman <gohman@apple.com>
Tue, 4 Nov 2008 23:41:45 +0000 (23:41 +0000)
a specialized pass that it not likely to be generally useful.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@58732 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/LinkAllPasses.h
include/llvm/Transforms/Scalar.h
lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp [new file with mode: 0644]
test/Transforms/SimplifyLibCalls/half-powr.ll [new file with mode: 0644]

index 4dbfa2619be9ae6ed31a434f680caa8062065942..0d9ba26aa14711105670b2836da10a08f1076395 100644 (file)
@@ -99,6 +99,7 @@ namespace {
       (void) llvm::createSCCPPass();
       (void) llvm::createScalarReplAggregatesPass();
       (void) llvm::createSimplifyLibCallsPass();
+      (void) llvm::createSimplifyHalfPowrLibCallsPass();
       (void) llvm::createSingleLoopExtractorPass();
       (void) llvm::createStripSymbolsPass();
       (void) llvm::createStripDeadPrototypesPass();
index 52f7967af2e196a6c3b6990ffeac0dfd9359abb6..2c3fdd4a788482a847e42aee9cf3aff312297db4 100644 (file)
@@ -317,6 +317,12 @@ Pass *createLoopDeletionPass();
 /// specific well-known (library) functions.
 FunctionPass *createSimplifyLibCallsPass();
 
+//===----------------------------------------------------------------------===//
+//
+/// createSimplifyHalfPowrLibCallsPass - This is an experimental pass that
+/// optimizes specific half_pow functions.
+FunctionPass *createSimplifyHalfPowrLibCallsPass();
+
 //===----------------------------------------------------------------------===//
 //
 // CodeGenPrepare - This pass prepares a function for instruction selection.
diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
new file mode 100644 (file)
index 0000000..530ad03
--- /dev/null
@@ -0,0 +1,159 @@
+//===- SimplifyHalfPowrLibCalls.cpp - Optimize specific half_powr calls ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple pass that applies an experimental
+// transformation on calls to specific functions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplify-libcalls-halfpowr"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+namespace {
+  /// This pass optimizes well half_powr function calls.
+  ///
+  class VISIBILITY_HIDDEN SimplifyHalfPowrLibCalls : public FunctionPass {
+    const TargetData *TD;
+  public:
+    static char ID; // Pass identification
+    SimplifyHalfPowrLibCalls() : FunctionPass(&ID) {}
+
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<TargetData>();
+    }
+
+    Instruction *
+    InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
+                    Instruction *InsertPt);
+  };
+  char SimplifyHalfPowrLibCalls::ID = 0;
+} // end anonymous namespace.
+
+static RegisterPass<SimplifyHalfPowrLibCalls>
+X("simplify-libcalls-halfpowr", "Simplify half_powr library calls");
+
+// Public interface to the Simplify HalfPowr LibCalls pass.
+FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
+  return new SimplifyHalfPowrLibCalls(); 
+}
+
+/// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging
+/// their control flow to better facilitate subsequent optimization.
+Instruction *
+SimplifyHalfPowrLibCalls::InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
+                                        Instruction *InsertPt) {
+  std::vector<BasicBlock *> Bodies;
+  BasicBlock *NewBlock = 0;
+
+  for (unsigned i = 0, e = HalfPowrs.size(); i != e; ++i) {
+    CallInst *Call = cast<CallInst>(HalfPowrs[i]);
+    Function *Callee = Call->getCalledFunction();
+
+    // Minimally sanity-check the CFG of half_powr to ensure that it contains
+    // the the kind of code we expect.  If we're running this pass, we have
+    // reason to believe it will be what we expect.
+    Function::iterator I = Callee->begin();
+    BasicBlock *Prologue = I++;
+    if (I == Callee->end()) break;
+    BasicBlock *SubnormalHandling = I++;
+    if (I == Callee->end()) break;
+    BasicBlock *Body = I++;
+    if (I != Callee->end()) break;
+    if (SubnormalHandling->getSinglePredecessor() != Prologue)
+      break;
+    BranchInst *PBI = dyn_cast<BranchInst>(Prologue->getTerminator());
+    if (!PBI || !PBI->isConditional())
+      break;
+    BranchInst *SNBI = dyn_cast<BranchInst>(SubnormalHandling->getTerminator());
+    if (!SNBI || SNBI->isConditional())
+      break;
+    if (!isa<ReturnInst>(Body->getTerminator()))
+      break;
+
+    Instruction *NextInst = next(BasicBlock::iterator(Call));
+
+    // Inline the call, taking care of what code ends up where.
+    NewBlock = SplitBlock(NextInst->getParent(), NextInst, this);
+
+    bool B = InlineFunction(Call, 0, TD);
+    assert(B && "half_powr didn't inline?");
+
+    BasicBlock *NewBody = NewBlock->getSinglePredecessor();
+    assert(NewBody);
+    Bodies.push_back(NewBody);
+  }
+
+  if (!NewBlock)
+    return InsertPt;
+
+  // Put the code for all the bodies into one block, to facilitate
+  // subsequent optimization.
+  (void)SplitEdge(NewBlock->getSinglePredecessor(), NewBlock, this);
+  for (unsigned i = 0, e = Bodies.size(); i != e; ++i) {
+    BasicBlock *Body = Bodies[i];
+    Instruction *FNP = Body->getFirstNonPHI();
+    // Splice the insts from body into NewBlock.
+    NewBlock->getInstList().splice(NewBlock->begin(), Body->getInstList(),
+                                   FNP, Body->getTerminator());
+  }
+
+  return NewBlock->begin();
+}
+
+/// runOnFunction - Top level algorithm.
+///
+bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) {
+  TD = &getAnalysis<TargetData>();
+  
+  bool Changed = false;
+  std::vector<Instruction *> HalfPowrs;
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      // Look for calls.
+      bool IsHalfPowr = false;
+      if (CallInst *CI = dyn_cast<CallInst>(I)) {
+        // Look for direct calls and calls to non-external functions.
+        Function *Callee = CI->getCalledFunction();
+        if (Callee && Callee->hasExternalLinkage()) {
+          // Look for calls with well-known names.
+          const char *CalleeName = Callee->getNameStart();
+          if (strcmp(CalleeName, "__half_powrf4") == 0)
+            IsHalfPowr = true;
+        }
+      }
+      if (IsHalfPowr)
+        HalfPowrs.push_back(I);
+      // We're looking for sequences of up to three such calls, which we'll
+      // simplify as a group.
+      if ((!IsHalfPowr && !HalfPowrs.empty()) || HalfPowrs.size() == 3) {
+        I = InlineHalfPowrs(HalfPowrs, I);
+        E = I->getParent()->end();
+        HalfPowrs.clear();
+        Changed = true;
+      }
+    }
+    assert(HalfPowrs.empty() && "Block had no terminator!");
+  }
+
+  return Changed;
+}
diff --git a/test/Transforms/SimplifyLibCalls/half-powr.ll b/test/Transforms/SimplifyLibCalls/half-powr.ll
new file mode 100644 (file)
index 0000000..f4e898c
--- /dev/null
@@ -0,0 +1,41 @@
+; RUN: llvm-as < %s | opt -simplify-libcalls-halfpowr | llvm-dis | %prcontext {mul float} 1 | grep {mul float} | count 8
+
+define float @__half_powrf4(float %f, float %g) nounwind readnone {
+entry:
+       %0 = fcmp olt float %f, 2.000000e+00            ; <i1> [#uses=1]
+       br i1 %0, label %bb, label %bb1
+
+bb:            ; preds = %entry
+       %1 = fdiv float %f, 3.000000e+00                ; <float> [#uses=1]
+       br label %bb1
+
+bb1:           ; preds = %bb, %entry
+       %f_addr.0 = phi float [ %1, %bb ], [ %f, %entry ]               ; <float> [#uses=1]
+       %2 = mul float %f_addr.0, %g            ; <float> [#uses=1]
+       ret float %2
+}
+
+define void @foo(float* %p) nounwind {
+entry:
+       %0 = load float* %p, align 4            ; <float> [#uses=1]
+       %1 = getelementptr float* %p, i32 1             ; <float*> [#uses=1]
+       %2 = load float* %1, align 4            ; <float> [#uses=1]
+       %3 = getelementptr float* %p, i32 2             ; <float*> [#uses=1]
+       %4 = load float* %3, align 4            ; <float> [#uses=1]
+       %5 = getelementptr float* %p, i32 3             ; <float*> [#uses=1]
+       %6 = load float* %5, align 4            ; <float> [#uses=1]
+       %7 = getelementptr float* %p, i32 4             ; <float*> [#uses=1]
+       %8 = load float* %7, align 4            ; <float> [#uses=1]
+       %9 = getelementptr float* %p, i32 5             ; <float*> [#uses=1]
+       %10 = load float* %9, align 4           ; <float> [#uses=1]
+       %11 = tail call float @__half_powrf4(float %0, float %6) nounwind               ; <float> [#uses=1]
+       %12 = tail call float @__half_powrf4(float %2, float %8) nounwind               ; <float> [#uses=1]
+       %13 = tail call float @__half_powrf4(float %4, float %10) nounwind              ; <float> [#uses=1]
+       %14 = getelementptr float* %p, i32 6            ; <float*> [#uses=1]
+       store float %11, float* %14, align 4
+       %15 = getelementptr float* %p, i32 7            ; <float*> [#uses=1]
+       store float %12, float* %15, align 4
+       %16 = getelementptr float* %p, i32 8            ; <float*> [#uses=1]
+       store float %13, float* %16, align 4
+       ret void
+}