[X86] Remove x86.avx2.psll.dq.bs and x86.avx2.psrl.dq.bs intrinsics.
authorCraig Topper <craig.topper@gmail.com>
Mon, 16 Feb 2015 20:51:59 +0000 (20:51 +0000)
committerCraig Topper <craig.topper@gmail.com>
Mon, 16 Feb 2015 20:51:59 +0000 (20:51 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229430 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/IR/IntrinsicsX86.td
lib/IR/AutoUpgrade.cpp
lib/Target/X86/X86InstrSSE.td

index 90ce74c30b5f27d153367cf7b2ea7da0fa4548f2..fef39a61c5b19cf45fa1f12aa36055f53cc8e3c7 100644 (file)
@@ -1586,12 +1586,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx2_psrl_dq :
               Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
                          llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_psll_dq_bs :
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_psrl_dq_bs :
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
 
   def int_x86_avx512_mask_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi512">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
index 0c494ef12e4715ed90bb44a7b53bb28f1f5b5bdc..3c5e469b34da5152ebd4511eda010f375fb68a5e 100644 (file)
@@ -165,6 +165,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
         Name == "x86.avx.vbroadcast.sd.256" ||
         Name == "x86.sse2.psll.dq.bs" ||
         Name == "x86.sse2.psrl.dq.bs" ||
+        Name == "x86.avx2.psll.dq.bs" ||
+        Name == "x86.avx2.psrl.dq.bs" ||
         (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
       NewFn = nullptr;
       return true;
@@ -526,6 +528,52 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       Rep = Builder.CreateBitCast(Op1,
                                   VectorType::get(Type::getInt64Ty(C), 2),
                                   "cast");
+    } else if (Name == "llvm.x86.avx2.psll.dq.bs") {
+      Value *Op0 = ConstantVector::getSplat(32, Builder.getInt8(0));
+      Value *Op1 = Builder.CreateBitCast(CI->getArgOperand(0),
+                                         VectorType::get(Type::getInt8Ty(C),32),
+                                         "cast");
+
+      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+
+      if (Shift < 16) {
+        SmallVector<Constant*, 32> Idxs;
+        for (unsigned l = 0; l < 32; l += 16)
+          for (unsigned i = 0; i != 16; ++i) {
+            unsigned Idx = i + Shift;
+            if (Idx >= 16) Idx += 16; // end of lane, switch operand.
+            Idxs.push_back(Builder.getInt32(Idx + l));
+          }
+
+        Op1 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
+      }
+
+      Rep = Builder.CreateBitCast(Op1,
+                                  VectorType::get(Type::getInt64Ty(C), 4),
+                                  "cast");
+    } else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
+      Value *Op0 = Builder.CreateBitCast(CI->getArgOperand(0),
+                                         VectorType::get(Type::getInt8Ty(C),32),
+                                         "cast");
+      Value *Op1 = ConstantVector::getSplat(32, Builder.getInt8(0));
+
+      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+
+      if (Shift < 16) {
+        SmallVector<Constant*, 32> Idxs;
+        for (unsigned l = 0; l < 32; l += 16)
+          for (unsigned i = 0; i != 16; ++i) {
+            unsigned Idx = 32 + i - Shift;
+            if (Idx < 32) Idx -= 16; // end of lane, switch operand.
+            Idxs.push_back(Builder.getInt32(Idx + l));
+          }
+
+        Op0 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
+      }
+
+      Rep = Builder.CreateBitCast(Op0,
+                                  VectorType::get(Type::getInt64Ty(C), 4),
+                                  "cast");
     } else {
       bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
       if (Name == "llvm.x86.avx.vpermil.pd.256")
index dc54fc5c9ca54671343908feb3c369228785b3ab..9799b61ca25a84d6366aa274e4219723fb889ae8 100644 (file)
@@ -4216,20 +4216,16 @@ defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
                              VR256, v8i32, v4i32, bc_v4i32, loadv2i64,
                              SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
   // 256-bit logical shifts.
   def VPSLLDQYri : PDIi8<0x73, MRM7r,
                     (outs VR256:$dst), (ins VR256:$src1, i32u8imm:$src2),
                     "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    [(set VR256:$dst,
-                      (int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2))]>,
-                    VEX_4V, VEX_L;
+                    []>, VEX_4V, VEX_L;
   def VPSRLDQYri : PDIi8<0x73, MRM3r,
                     (outs VR256:$dst), (ins VR256:$src1, i32u8imm:$src2),
                     "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    [(set VR256:$dst,
-                      (int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2))]>,
-                    VEX_4V, VEX_L;
+                    []>, VEX_4V, VEX_L;
   // PSRADQYri doesn't exist in SSE[1-3].
 }
 } // Predicates = [HasAVX2]