Eliminate x86.sse2.movs.d, x86.sse2.shuf.pd, x86.sse2.unpckh.pd, and x86.sse2.unpckl...
authorEvan Cheng <evan.cheng@apple.com>
Sat, 24 May 2008 02:14:05 +0000 (02:14 +0000)
committerEvan Cheng <evan.cheng@apple.com>
Sat, 24 May 2008 02:14:05 +0000 (02:14 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@51531 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/IntrinsicsX86.td
lib/Target/X86/X86InstrSSE.td
lib/VMCore/AutoUpgrade.cpp
test/Bitcode/sse2_movs_d.ll [new file with mode: 0644]
test/Bitcode/sse2_movs_d.ll.bc [new file with mode: 0644]
test/Bitcode/sse2_shuf_pd.ll [new file with mode: 0644]
test/Bitcode/sse2_shuf_pd.ll.bc [new file with mode: 0644]
test/Bitcode/sse2_unpck_pd.ll [new file with mode: 0644]
test/Bitcode/sse2_unpck_pd.ll.bc [new file with mode: 0644]

index 8af50c152fb25adb5e1a84724d6748820345e51b..47c8e197170fa4c87990ebbfd6c815daaed3db78 100644 (file)
@@ -504,18 +504,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 // Shuffles.
 // FIXME: Temporary workarounds since 2-wide shuffle is broken.
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse2_movs_d : GCCBuiltin<"__builtin_ia32_movsd">,
-              Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_shuf_pd : GCCBuiltin<"__builtin_ia32_shufpd">,
-              Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
-                         llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_unpckh_pd : GCCBuiltin<"__builtin_ia32_unpckhpd">,
-              Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_unpckl_pd : GCCBuiltin<"__builtin_ia32_unpcklpd">,
-              Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
   def int_x86_sse2_punpckh_qdq : GCCBuiltin<"__builtin_ia32_punpckhqdq128">,
               Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
                          llvm_v2i64_ty], [IntrNoMem]>;
index 6f4eede2ecfbb9111e21e63ccb97a1f5ea2dea1c..8e6d9df3827a1df469e93cde78f4bb4e5cd98303 100644 (file)
@@ -3051,22 +3051,6 @@ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
           (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
 
 // FIXME: Temporary workaround since 2-wide shuffle is broken.
-def : Pat<(int_x86_sse2_movs_d  VR128:$src1, VR128:$src2),
-          (v2f64 (MOVLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, VR128:$src2, imm:$src3),
-          (v2f64 (SHUFPDrri VR128:$src1, VR128:$src2, imm:$src3))>,
-      Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, (memop addr:$src2),imm:$src3),
-          (v2f64 (SHUFPDrmi VR128:$src1, addr:$src2, imm:$src3))>,
-      Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, VR128:$src2),
-          (v2f64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, (memop addr:$src2)),
-          (v2f64 (UNPCKHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, VR128:$src2),
-          (v2f64 (UNPCKLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, (memop addr:$src2)),
-          (v2f64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
 def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, VR128:$src2),
           (v2i64 (PUNPCKHQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
 def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, (memop addr:$src2)),
index 381463906237652f9bdd0323039e04c07a326895..0d6ae43d0f0277708dc93f29bf0f8637d0d0db26 100644 (file)
@@ -150,7 +150,11 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
       return true;
     } else if (Name.compare(5,17,"x86.sse2.loadh.pd",17) == 0 ||
                Name.compare(5,17,"x86.sse2.loadl.pd",17) == 0 ||
-               Name.compare(5,16,"x86.sse2.movl.dq",16) == 0) {
+               Name.compare(5,16,"x86.sse2.movl.dq",16) == 0 ||
+               Name.compare(5,15,"x86.sse2.movs.d",15) == 0 ||
+               Name.compare(5,16,"x86.sse2.shuf.pd",16) == 0 ||
+               Name.compare(5,18,"x86.sse2.unpckh.pd",18) == 0 ||
+               Name.compare(5,18,"x86.sse2.unpckl.pd",18) == 0 ) {
       // Calls to these intrinsics are transformed into ShuffleVector's.
       NewFn = 0;
       return true;
@@ -187,17 +191,28 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
 
   if (!NewFn) {
     bool isLoadH = false, isLoadL = false, isMovL = false;
+    bool isMovSD = false, isShufPD = false;
+    bool isUnpckhPD = false, isUnpcklPD = false;
     if (strcmp(F->getNameStart(), "llvm.x86.sse2.loadh.pd") == 0)
       isLoadH = true;
     else if (strcmp(F->getNameStart(), "llvm.x86.sse2.loadl.pd") == 0)
       isLoadL = true;
     else if (strcmp(F->getNameStart(), "llvm.x86.sse2.movl.dq") == 0)
       isMovL = true;
-
-    if (isLoadH || isLoadL || isMovL) {
+    else if (strcmp(F->getNameStart(), "llvm.x86.sse2.movs.d") == 0)
+      isMovSD = true;
+    else if (strcmp(F->getNameStart(), "llvm.x86.sse2.shuf.pd") == 0)
+      isShufPD = true;
+    else if (strcmp(F->getNameStart(), "llvm.x86.sse2.unpckh.pd") == 0)
+      isUnpckhPD = true;
+    else if (strcmp(F->getNameStart(), "llvm.x86.sse2.unpckl.pd") == 0)
+      isUnpcklPD = true;
+
+    if (isLoadH || isLoadL || isMovL || isMovSD || isShufPD ||
+        isUnpckhPD || isUnpcklPD) {
       std::vector<Constant*> Idxs;
       Value *Op0 = CI->getOperand(1);
-      ShuffleVectorInst *SI;
+      ShuffleVectorInst *SI = NULL;
       if (isLoadH || isLoadL) {
         Value *Op1 = UndefValue::get(Op0->getType());
         Value *Addr = new BitCastInst(CI->getOperand(2), 
@@ -216,7 +231,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
         }
         Value *Mask = ConstantVector::get(Idxs);
         SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
-      } else {
+      } else if (isMovL) {
         Constant *Zero = ConstantInt::get(Type::Int32Ty, 0);
         Idxs.push_back(Zero);
         Idxs.push_back(Zero);
@@ -231,8 +246,31 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
         Idxs.push_back(ConstantInt::get(Type::Int32Ty, 3));
         Value *Mask = ConstantVector::get(Idxs);
         SI = new ShuffleVectorInst(ZeroV, Op0, Mask, "upgraded.", CI);
+      } else if (isMovSD || isUnpckhPD || isUnpcklPD) {
+        Value *Op1 = CI->getOperand(2);
+        if (isMovSD) {
+          Idxs.push_back(ConstantInt::get(Type::Int32Ty, 2));
+          Idxs.push_back(ConstantInt::get(Type::Int32Ty, 1));
+        } else if (isUnpckhPD) {
+          Idxs.push_back(ConstantInt::get(Type::Int32Ty, 1));
+          Idxs.push_back(ConstantInt::get(Type::Int32Ty, 3));
+        } else {
+          Idxs.push_back(ConstantInt::get(Type::Int32Ty, 0));
+          Idxs.push_back(ConstantInt::get(Type::Int32Ty, 2));
+        }
+        Value *Mask = ConstantVector::get(Idxs);
+        SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
+      } else if (isShufPD) {
+        Value *Op1 = CI->getOperand(2);
+        unsigned MaskVal = cast<ConstantInt>(CI->getOperand(3))->getZExtValue();
+        Idxs.push_back(ConstantInt::get(Type::Int32Ty, MaskVal & 1));
+        Idxs.push_back(ConstantInt::get(Type::Int32Ty, ((MaskVal >> 1) & 1)+2));
+        Value *Mask = ConstantVector::get(Idxs);
+        SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
       }
 
+      assert(SI && "Unexpected!");
+
       // Handle any uses of the old CallInst.
       if (!CI->use_empty())
         //  Replace all uses of the old call with the new cast which has the 
diff --git a/test/Bitcode/sse2_movs_d.ll b/test/Bitcode/sse2_movs_d.ll
new file mode 100644 (file)
index 0000000..25a35b6
--- /dev/null
@@ -0,0 +1,2 @@
+; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.movs.d}
+; RUN: llvm-dis < %s.bc | grep shufflevector
diff --git a/test/Bitcode/sse2_movs_d.ll.bc b/test/Bitcode/sse2_movs_d.ll.bc
new file mode 100644 (file)
index 0000000..719d529
Binary files /dev/null and b/test/Bitcode/sse2_movs_d.ll.bc differ
diff --git a/test/Bitcode/sse2_shuf_pd.ll b/test/Bitcode/sse2_shuf_pd.ll
new file mode 100644 (file)
index 0000000..5829edb
--- /dev/null
@@ -0,0 +1,2 @@
+; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.shuf.pd}
+; RUN: llvm-dis < %s.bc | grep shufflevector
diff --git a/test/Bitcode/sse2_shuf_pd.ll.bc b/test/Bitcode/sse2_shuf_pd.ll.bc
new file mode 100644 (file)
index 0000000..832c39e
Binary files /dev/null and b/test/Bitcode/sse2_shuf_pd.ll.bc differ
diff --git a/test/Bitcode/sse2_unpck_pd.ll b/test/Bitcode/sse2_unpck_pd.ll
new file mode 100644 (file)
index 0000000..f4e5d54
--- /dev/null
@@ -0,0 +1,3 @@
+; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.unpckh.pd}
+; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.unpckl.pd}
+; RUN: llvm-dis < %s.bc | grep shufflevector
diff --git a/test/Bitcode/sse2_unpck_pd.ll.bc b/test/Bitcode/sse2_unpck_pd.ll.bc
new file mode 100644 (file)
index 0000000..4fb829c
Binary files /dev/null and b/test/Bitcode/sse2_unpck_pd.ll.bc differ