Fixes two issue in SimplifyDemandedBits of sext_in_reg:

author Michael Kuperstein <michael.m.kuperstein@intel.com>

Wed, 18 Feb 2015 09:43:40 +0000 (09:43 +0000)

committer Michael Kuperstein <michael.m.kuperstein@intel.com>

Wed, 18 Feb 2015 09:43:40 +0000 (09:43 +0000)
author Michael Kuperstein <michael.m.kuperstein@intel.com>
Wed, 18 Feb 2015 09:43:40 +0000 (09:43 +0000)
committer Michael Kuperstein <michael.m.kuperstein@intel.com>
Wed, 18 Feb 2015 09:43:40 +0000 (09:43 +0000)
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index a1f425eeac114833bda9ec30bcf1ac10602b206e..f2c5a63354e367e451852331a18e3061738a1bec 100644 (file)
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -793,19 +793,26 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
  
      APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
      // If we only care about the highest bit, don't bother shifting right.
-    if (MsbMask == DemandedMask) {
+    if (MsbMask == NewMask) {
        unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
        SDValue InOp = Op.getOperand(0);
-
-      // Compute the correct shift amount type, which must be getShiftAmountTy
-      // for scalar types after legalization.
-      EVT ShiftAmtTy = Op.getValueType();
-      if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
-        ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
-
-      SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
-      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
-                                            Op.getValueType(), InOp, ShiftAmt));
+      unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits();
+      bool AlreadySignExtended =
+        TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1;
+      // However if the input is already sign extended we expect the sign
+      // extension to be dropped altogether later and do not simplify.
+      if (!AlreadySignExtended) {
+        // Compute the correct shift amount type, which must be getShiftAmountTy
+        // for scalar types after legalization.
+        EVT ShiftAmtTy = Op.getValueType();
+        if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
+          ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
+
+        SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+                                                 Op.getValueType(), InOp,
+                                                 ShiftAmt));
+      }
      }
  
      // Sign extension.  Compute the demanded bits in the result that are not
diff --git a/test/CodeGen/X86/vector-blend.ll b/test/CodeGen/X86/vector-blend.ll

index 0a7114b55107f1c2d2276619314d3c484bf742a8..18203de729487503aaa1953e7f4d84d4ff9e8add 100644 (file)
--- a/test/CodeGen/X86/vector-blend.ll
+++ b/test/CodeGen/X86/vector-blend.ll
@@ -419,8 +419,8 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
  ;
  ; SSE41-LABEL: vsel_i648:
  ; SSE41:       # BB#0: # %entry
-; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
-; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
+; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
+; SSE41-NEXT:    blendpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
  ; SSE41-NEXT:    movaps %xmm5, %xmm1
  ; SSE41-NEXT:    movaps %xmm7, %xmm3
  ; SSE41-NEXT:    retq
diff --git a/test/CodeGen/X86/vselect-avx.ll b/test/CodeGen/X86/vselect-avx.ll

index 0c0f4bbf992aa31ff1d559fc09791e2758aab965..26b00dbd85f53529f0b11f2ffb86eeb6e7938aa0 100644 (file)
--- a/test/CodeGen/X86/vselect-avx.ll
+++ b/test/CodeGen/X86/vselect-avx.ll
@@ -59,19 +59,15 @@ bb:
  ; 
  ; <rdar://problem/18819506>
  
-; Note: For now, hard code ORIG_MASK and SHRUNK_MASK registers, because we
-; cannot express that ORIG_MASK must not be equal to ORIG_MASK. Otherwise,
-; even a faulty pattern would pass!
-;  
  ; CHECK-LABEL: test3:
-; Compute the original mask.
-;      CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[ORIG_MASK:%xmm0]]
-; Shrink the bit of the mask.
-; CHECK-NEXT: vpslld   $31, [[ORIG_MASK]], [[SHRUNK_MASK:%xmm3]]
-; Use the shrunk mask in the blend.
-; CHECK-NEXT:  vblendvps       [[SHRUNK_MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
-; Use the original mask in the and.
-; CHECK-NEXT: vpand LCPI2_2(%rip), [[ORIG_MASK]], {{%xmm[0-9]+}} 
+; Compute the mask.
+;      CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[MASK:%xmm[0-9]+]]
+; Do not shrink the bit of the mask.
+; CHECK-NOT: vpslld    $31, [[MASK]], {{%xmm[0-9]+}}
+; Use the mask in the blend.
+; CHECK-NEXT:  vblendvps       [[MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
+; Use the mask in the and.
+; CHECK-NEXT: vpand LCPI2_2(%rip), [[MASK]], {{%xmm[0-9]+}} 
  ; CHECK: retq
  define void @test3(<4 x i32> %induction30, <4 x i16>* %tmp16, <4 x i16>* %tmp17,  <4 x i16> %tmp3, <4 x i16> %tmp12) {
    %tmp6 = srem <4 x i32> %induction30, <i32 3, i32 3, i32 3, i32 3>
author	Michael Kuperstein <michael.m.kuperstein@intel.com>
	Wed, 18 Feb 2015 09:43:40 +0000 (09:43 +0000)
committer	Michael Kuperstein <michael.m.kuperstein@intel.com>
	Wed, 18 Feb 2015 09:43:40 +0000 (09:43 +0000)
lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
test/CodeGen/X86/vector-blend.ll		patch \| blob \| history
test/CodeGen/X86/vselect-avx.ll		patch \| blob \| history