This commit contains a few changes that had to go in together.

author Nadav Rotem <nadav.rotem@intel.com>

Sun, 1 Apr 2012 19:31:22 +0000 (19:31 +0000)

committer Nadav Rotem <nadav.rotem@intel.com>

Sun, 1 Apr 2012 19:31:22 +0000 (19:31 +0000)
author Nadav Rotem <nadav.rotem@intel.com>
Sun, 1 Apr 2012 19:31:22 +0000 (19:31 +0000)
committer Nadav Rotem <nadav.rotem@intel.com>
Sun, 1 Apr 2012 19:31:22 +0000 (19:31 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index bac644a42a8ac25f0c0bbb521bf8327060ddfce6..5e88fcbb0e10fffa90614a8dfd1c49962dcfbc18 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2336,6 +2336,68 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
                         ORNode, N0.getOperand(1));
    }
  
+  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
+  // Only perform this optimization after type legalization and before
+  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
+  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
+  // we don't want to undo this promotion.
+  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
+  // on scalars.
+  if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
+      && Level == AfterLegalizeVectorOps) {
+    SDValue In0 = N0.getOperand(0);
+    SDValue In1 = N1.getOperand(0);
+    EVT In0Ty = In0.getValueType();
+    EVT In1Ty = In1.getValueType();
+    // If both incoming values are integers, and the original types are the same.
+    if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
+      SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1);
+      SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op);
+      AddToWorkList(Op.getNode());
+      return BC;
+    }
+  }
+
+  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
+  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
+  // If both shuffles use the same mask, and both shuffle within a single
+  // vector, then it is worthwhile to move the swizzle after the operation.
+  // The type-legalizer generates this pattern when loading illegal
+  // vector types from memory. In many cases this allows additional shuffle
+  // optimizations.
+  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
+    ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
+    ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
+    SDValue In0 = SVN0->getOperand(0);
+    SDValue In1 = SVN1->getOperand(0);
+    EVT In0Ty = In0.getValueType();
+    EVT In1Ty = In1.getValueType();
+
+    unsigned NumElts = VT.getVectorNumElements();
+    // Check that both shuffles are swizzles.
+    bool SingleVecShuff = (N0.getOperand(1).getOpcode() == ISD::UNDEF &&
+                           N1.getOperand(1).getOpcode() == ISD::UNDEF);
+
+    // Check that both shuffles use the same mask. The masks are known to be of
+    // the same length because the result vector type is the same.
+    bool SameMask = true;
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx0 = SVN0->getMaskElt(i);
+      int Idx1 = SVN1->getMaskElt(i);
+      if (Idx0 != Idx1) {
+        SameMask = false;
+        break;
+      }
+    }
+
+    if (SameMask && SingleVecShuff && In0Ty == In1Ty) {
+      SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, In0, In1);
+      SDValue Shuff = DAG.getVectorShuffle(VT, N->getDebugLoc(), Op,
+                                          DAG.getUNDEF(VT), &SVN0->getMask()[0]);
+      AddToWorkList(Op.getNode());
+      return Shuff;
+    }
+  }
    return SDValue();
  }
  
@@ -7721,6 +7783,36 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
          return N0;
      }
    }
+
+  // If this shuffle node is simply a swizzle of another shuffle node,
+  // optimize shuffle(shuffle(x, y), undef) -> shuffle(x, y).
+  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+      N1.getOpcode() == ISD::UNDEF) {
+
+    SmallVector<int, 8> NewMask;
+    ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+
+    EVT InVT = N0.getValueType();
+    int InNumElts = InVT.getVectorNumElements();
+
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      // If we access the second (undef) operand then this index can be
+      // canonicalized to undef as well.
+      if (Idx >= InNumElts)
+        Idx = -1;
+      // Next, this index comes from the first value, which is the incoming
+      // shuffle. Adopt the incoming index.
+      if (Idx >= 0)
+        Idx = OtherSV->getMaskElt(Idx);
+
+      NewMask.push_back(Idx);
+    }
+
+    return DAG.getVectorShuffle(VT, N->getDebugLoc(), OtherSV->getOperand(0),
+                                OtherSV->getOperand(1), &NewMask[0]);
+  }
+
    return SDValue();
  }
  
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 88f38298db05853805fa5d8310022cca098ac055..69a60361314aea849df7abfa98d65b040f6e2982 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -14000,13 +14000,14 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
          return SDValue();
  
        // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
-      if (Mask.getOpcode() != ISD::BITCAST ||
-          X.getOpcode() != ISD::BITCAST ||
-          Y.getOpcode() != ISD::BITCAST)
-        return SDValue();
-
        // Look through mask bitcast.
-      Mask = Mask.getOperand(0);
+      if (Mask.getOpcode() == ISD::BITCAST)
+        Mask = Mask.getOperand(0);
+      if (X.getOpcode() == ISD::BITCAST)
+        X = X.getOperand(0);
+      if (Y.getOpcode() == ISD::BITCAST)
+        Y = Y.getOperand(0);
+
        EVT MaskVT = Mask.getValueType();
  
        // Validate that the Mask operand is a vector sra node.
@@ -14027,8 +14028,6 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
        // Now we know we at least have a plendvb with the mask val.  See if
        // we can form a psignb/w/d.
        // psign = x.type == y.type == mask.type && y = sub(0, x);
-      X = X.getOperand(0);
-      Y = Y.getOperand(0);
        if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
            ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
            X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll

index 78b4e7ea84ca3c6e7850000c71bc19697c46363e..05794e4ebddb19c78799d13177ff0bf588788a39 100644 (file)
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -273,7 +273,7 @@ define arm_aapcs_vfpcc i32 @t10() nounwind {
  entry:
  ; CHECK: t10:
  ; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3f000000
-; CHECK: vmul.f32 q8, q8, d0[0]
+; CHECK: vmul.f32 q8, q8, d[[DREG:[0-1]+]]
  ; CHECK: vadd.f32 q8, q8, q8
    %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
    %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll

index 977093527609f8499a1c66ec45bdd29dddbb74b9..8b7af20b4a98332cfe0d3a11682cd5bfa20a6845 100644 (file)
--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ b/test/CodeGen/CellSPU/rotate_ops.ll
@@ -1,5 +1,5 @@
  ; RUN: llc < %s -march=cellspu -o %t1.s
-; RUN: grep rot          %t1.s | count 86
+; RUN: grep rot          %t1.s | count 85
  ; RUN: grep roth         %t1.s | count 8
  ; RUN: grep roti.*5      %t1.s | count 1
  ; RUN: grep roti.*27     %t1.s | count 1
diff --git a/test/CodeGen/X86/2011-10-27-tstore.ll b/test/CodeGen/X86/2011-10-27-tstore.ll

index 016e02c3d5d71290d4dca7287e703f974912e54e..1712f3456539287451efa328f9cf9e10bdbb4c13 100644 (file)
--- a/test/CodeGen/X86/2011-10-27-tstore.ll
+++ b/test/CodeGen/X86/2011-10-27-tstore.ll
@@ -3,14 +3,14 @@
  target triple = "x86_64-unknown-linux-gnu"
  
  ;CHECK: ltstore
-;CHECK: pshufd
-;CHECK: pshufd
-;CHECK: ret
-define void @ltstore() {
+;CHECK: movq
+;CHECK-NEXT: movq
+;CHECK-NEXT: ret
+define void @ltstore(<4 x i32>* %pIn, <2 x i32>* %pOut) {
  entry:
-  %in = load <4 x i32>* undef
+  %in = load <4 x i32>* %pIn
    %j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
-  store <2 x i32> %j, <2 x i32>* undef
+  store <2 x i32> %j, <2 x i32>* %pOut
    ret void
  }
  
diff --git a/test/CodeGen/X86/SwizzleShuff.ll b/test/CodeGen/X86/SwizzleShuff.ll

new file mode 100644 (file)

index 0000000..11b702e
--- /dev/null
+++ b/test/CodeGen/X86/SwizzleShuff.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; Check that we perform a scalar XOR on i32.
+
+; CHECK: pull_bitcast
+; CHECK: xorl
+; CHECK: ret
+define void @pull_bitcast (<4 x i8>* %pA, <4 x i8>* %pB) {
+  %A = load <4 x i8>* %pA
+  %B = load <4 x i8>* %pB
+  %C = xor <4 x i8> %A, %B
+  store <4 x i8> %C, <4 x i8>* %pA
+  ret void
+}
diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll

index 060839893820fd03eadb3902e31fc2d5f4e08043..65486cb80c9f14f276643196614a780009b83dc5 100644 (file)
--- a/test/CodeGen/X86/vec_shuffle-37.ll
+++ b/test/CodeGen/X86/vec_shuffle-37.ll
@@ -27,11 +27,11 @@ entry:
  define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline {
  entry:
  ; CHECK: t02
-; CHECK: movaps
-; CHECK: shufps
-; CHECK: pshufd
-; CHECK: movq
-; CHECK: ret
+; CHECK: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: ret
    %0 = bitcast <8 x i32>* %source to <4 x i32>*
    %arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3
    %tmp2 = load <4 x i32>* %arrayidx, align 16
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll

index 7bebb274f6ec05e4d3b267c76263f5ac25bc2469..94200537168c4e8e5fb4c1657932b1445e1d402a 100644 (file)
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -33,7 +33,7 @@ entry:
  define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
  entry:
  ; CHECK: shuf3:
-; CHECK: shufps
+; CHECK: shufd
    %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 
    %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
author	Nadav Rotem <nadav.rotem@intel.com>
	Sun, 1 Apr 2012 19:31:22 +0000 (19:31 +0000)
committer	Nadav Rotem <nadav.rotem@intel.com>
	Sun, 1 Apr 2012 19:31:22 +0000 (19:31 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/ARM/reg_sequence.ll		patch \| blob \| history
test/CodeGen/CellSPU/rotate_ops.ll		patch \| blob \| history
test/CodeGen/X86/2011-10-27-tstore.ll		patch \| blob \| history
test/CodeGen/X86/SwizzleShuff.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/vec_shuffle-37.ll		patch \| blob \| history
test/CodeGen/X86/widen_shuffle-1.ll		patch \| blob \| history