From 9bf12b5583104c810cfadcdce91edf9efad79973 Mon Sep 17 00:00:00 2001
From: Evan Cheng <evan.cheng@apple.com>
Date: Tue, 26 Feb 2008 02:42:37 +0000
Subject: [PATCH] Fix PR2076. CodeGenPrepare now sinks address computation for
 inline asm memory operands into inline asm block.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@47589 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/CodeGenPrepare.cpp    | 55 +++++++++++++++++++++
 test/CodeGen/X86/2008-02-25-InlineAsmBug.ll | 33 +++++++++++++
 2 files changed, 88 insertions(+)
 create mode 100644 test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 5c572a640ed..e6f7283ef4e 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
 #include "llvm/Target/TargetAsmInfo.h"
@@ -28,6 +29,7 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
@@ -58,6 +60,8 @@ namespace {
     bool OptimizeLoadStoreInst(Instruction *I, Value *Addr,
                                const Type *AccessTy,
                                DenseMap<Value*,Value*> &SunkAddrs);
+    bool OptimizeInlineAsmInst(Instruction *I, CallSite CS,
+                               DenseMap<Value*,Value*> &SunkAddrs);
     bool OptimizeExtUses(Instruction *I);
   };
 }
@@ -928,6 +932,54 @@ bool CodeGenPrepare::OptimizeLoadStoreInst(Instruction *LdStInst, Value *Addr,
   return true;
 }
 
+/// OptimizeInlineAsmInst - If there are any memory operands, use
+/// OptimizeLoadStoreInt to sink their address computing into the block when
+/// possible / profitable.
+bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
+                                           DenseMap<Value*,Value*> &SunkAddrs) {
+  bool MadeChange = false;
+  InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+
+  // Do a prepass over the constraints, canonicalizing them, and building up the
+  // ConstraintOperands list.
+  std::vector<InlineAsm::ConstraintInfo>
+    ConstraintInfos = IA->ParseConstraints();
+
+  /// ConstraintOperands - Information about all of the constraints.
+  std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands;
+  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
+  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+    ConstraintOperands.
+      push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i]));
+    TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+    // Compute the value type for each operand.
+    switch (OpInfo.Type) {
+    case InlineAsm::isOutput:
+      if (OpInfo.isIndirect)
+        OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+      break;
+    case InlineAsm::isInput:
+      OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+      break;
+    case InlineAsm::isClobber:
+      // Nothing to do.
+      break;
+    }
+
+    // Compute the constraint code and ConstraintType to use.
+    OpInfo.ComputeConstraintToUse(*TLI);
+
+    if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+      Value *OpVal = OpInfo.CallOperandVal;
+      MadeChange |= OptimizeLoadStoreInst(I, OpVal, OpVal->getType(),
+                                          SunkAddrs);
+    }
+  }
+
+  return MadeChange;
+}
+
 bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
   BasicBlock *DefBB = I->getParent();
 
@@ -1076,6 +1128,9 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
             TLI->getTargetMachine().getTargetAsmInfo()) {
           if (TAI->ExpandInlineAsm(CI))
             BBI = BB.begin();
+          else
+            // Sink address computing for memory operands into the block.
+            MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
         }
     }
   }
diff --git a/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll b/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
new file mode 100644
index 00000000000..ff7cf5e94e2
--- /dev/null
+++ b/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
@@ -0,0 +1,33 @@
+; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu -mattr=+sse2
+; PR2076
+
+define void @h264_h_loop_filter_luma_mmx2(i8* %pix, i32 %stride, i32 %alpha, i32 %beta, i8* %tc0) nounwind  {
+entry:
+	%tmp164 = getelementptr [16 x i32]* null, i32 0, i32 11		; <i32*> [#uses=1]
+	%tmp169 = getelementptr [16 x i32]* null, i32 0, i32 13		; <i32*> [#uses=1]
+	%tmp174 = getelementptr [16 x i32]* null, i32 0, i32 15		; <i32*> [#uses=1]
+	%tmp154.sum317 = add i32 0, %stride		; <i32> [#uses=1]
+	%tmp154.sum315 = mul i32 %stride, 6		; <i32> [#uses=1]
+	%tmp154.sum = mul i32 %stride, 7		; <i32> [#uses=1]
+	%pix_addr.0327.rec = mul i32 0, 0		; <i32> [#uses=4]
+	br i1 false, label %bb292, label %bb32
+
+bb32:		; preds = %entry
+	%pix_addr.0327.sum340 = add i32 %pix_addr.0327.rec, 0		; <i32> [#uses=1]
+	%tmp154 = getelementptr i8* %pix, i32 %pix_addr.0327.sum340		; <i8*> [#uses=1]
+	%tmp177178 = bitcast i8* %tmp154 to i32*		; <i32*> [#uses=1]
+	%pix_addr.0327.sum339 = add i32 %pix_addr.0327.rec, %tmp154.sum317		; <i32> [#uses=1]
+	%tmp181 = getelementptr i8* %pix, i32 %pix_addr.0327.sum339		; <i8*> [#uses=1]
+	%tmp181182 = bitcast i8* %tmp181 to i32*		; <i32*> [#uses=1]
+	%pix_addr.0327.sum338 = add i32 %pix_addr.0327.rec, %tmp154.sum315		; <i32> [#uses=1]
+	%tmp186 = getelementptr i8* %pix, i32 %pix_addr.0327.sum338		; <i8*> [#uses=1]
+	%tmp186187 = bitcast i8* %tmp186 to i32*		; <i32*> [#uses=1]
+	%pix_addr.0327.sum337 = add i32 %pix_addr.0327.rec, %tmp154.sum		; <i32> [#uses=1]
+	%tmp191 = getelementptr i8* %pix, i32 %pix_addr.0327.sum337		; <i8*> [#uses=1]
+	%tmp191192 = bitcast i8* %tmp191 to i32*		; <i32*> [#uses=1]
+	call void asm sideeffect "movd  $4, %mm0                \0A\09movd  $5, %mm1                \0A\09movd  $6, %mm2                \0A\09movd  $7, %mm3                \0A\09punpcklbw %mm1, %mm0         \0A\09punpcklbw %mm3, %mm2         \0A\09movq %mm0, %mm1              \0A\09punpcklwd %mm2, %mm0         \0A\09punpckhwd %mm2, %mm1         \0A\09movd  %mm0, $0                \0A\09punpckhdq %mm0, %mm0         \0A\09movd  %mm0, $1                \0A\09movd  %mm1, $2                \0A\09punpckhdq %mm1, %mm1         \0A\09movd  %mm1, $3                \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* null, i32* %tmp164, i32* %tmp169, i32* %tmp174, i32* %tmp177178, i32* %tmp181182, i32* %tmp186187, i32* %tmp191192 ) nounwind 
+	unreachable
+
+bb292:		; preds = %entry
+	ret void
+}
-- 
2.34.1