[X86][SSE] Merge PerformBLENDICombine into PerformShuffleCombine

author Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 5 Jan 2016 09:12:17 +0000 (09:12 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 5 Jan 2016 09:12:17 +0000 (09:12 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 5 Jan 2016 09:12:17 +0000 (09:12 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 5 Jan 2016 09:12:17 +0000 (09:12 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 27f08a8cc536aadc016ab308d853d3b663dec4e5..6d748ca51645c1f517120266bf9a549d71bc1bfc 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -23561,6 +23561,31 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
      }
      return SDValue();
    }
      }
      return SDValue();
    }
+  case X86ISD::BLENDI: {
+    SDValue V0 = N->getOperand(0);
+    SDValue V1 = N->getOperand(1);
+    assert(VT == V0.getSimpleValueType() && VT == V1.getSimpleValueType() &&
+           "Unexpected input vector types");
+
+    // Canonicalize a v2f64 blend with a mask of 2 by swapping the vector
+    // operands and changing the mask to 1. This saves us a bunch of
+    // pattern-matching possibilities related to scalar math ops in SSE/AVX.
+    // x86InstrInfo knows how to commute this back after instruction selection
+    // if it would help register allocation.
+
+    // TODO: If optimizing for size or a processor that doesn't suffer from
+    // partial register update stalls, this should be transformed into a MOVSD
+    // instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD.
+
+    if (VT == MVT::v2f64)
+      if (auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+        if (Mask->getZExtValue() == 2 && !isShuffleFoldableLoad(V0)) {
+          SDValue NewMask = DAG.getConstant(1, DL, MVT::i8);
+          return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask);
+        }
+
+    return SDValue();
+  }
    default:
      return SDValue();
    }
    default:
      return SDValue();
    }
@@ -27463,34 +27488,6 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
    return SDValue();
  }
  
    return SDValue();
  }
  
-static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) {
-  SDValue V0 = N->getOperand(0);
-  SDValue V1 = N->getOperand(1);
-  SDLoc DL(N);
-  EVT VT = N->getValueType(0);
-  assert(VT == V0.getValueType() && VT == V1.getValueType() &&
-         "Unexpected input vector types");
-
-  // Canonicalize a v2f64 blend with a mask of 2 by swapping the vector
-  // operands and changing the mask to 1. This saves us a bunch of
-  // pattern-matching possibilities related to scalar math ops in SSE/AVX.
-  // x86InstrInfo knows how to commute this back after instruction selection
-  // if it would help register allocation.
-
-  // TODO: If optimizing for size or a processor that doesn't suffer from
-  // partial register update stalls, this should be transformed into a MOVSD
-  // instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD.
-
-  if (VT == MVT::v2f64)
-    if (auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(2)))
-      if (Mask->getZExtValue() == 2 && !isShuffleFoldableLoad(V0)) {
-        SDValue NewMask = DAG.getConstant(1, DL, MVT::i8);
-        return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask);
-      }
-
-  return SDValue();
-}
-
  static SDValue PerformGatherScatterCombine(SDNode *N, SelectionDAG &DAG) {
    SDLoc DL(N);
    // Gather and Scatter instructions use k-registers for masks. The type of
  static SDValue PerformGatherScatterCombine(SDNode *N, SelectionDAG &DAG) {
    SDLoc DL(N);
    // Gather and Scatter instructions use k-registers for masks. The type of
@@ -27934,6 +27931,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
    case X86ISD::VZEXT:       return performVZEXTCombine(N, DAG, DCI, Subtarget);
    case X86ISD::SHUFP:       // Handle all target specific shuffles
    case X86ISD::PALIGNR:
    case X86ISD::VZEXT:       return performVZEXTCombine(N, DAG, DCI, Subtarget);
    case X86ISD::SHUFP:       // Handle all target specific shuffles
    case X86ISD::PALIGNR:
+  case X86ISD::BLENDI:
    case X86ISD::UNPCKH:
    case X86ISD::UNPCKL:
    case X86ISD::MOVHLPS:
    case X86ISD::UNPCKH:
    case X86ISD::UNPCKL:
    case X86ISD::MOVHLPS:
@@ -27948,7 +27946,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
    case X86ISD::VPERM2X128:
    case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
    case ISD::FMA:            return PerformFMACombine(N, DAG, Subtarget);
    case X86ISD::VPERM2X128:
    case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
    case ISD::FMA:            return PerformFMACombine(N, DAG, Subtarget);
-  case X86ISD::BLENDI:    return PerformBLENDICombine(N, DAG);
    case ISD::MGATHER:
    case ISD::MSCATTER:       return PerformGatherScatterCombine(N, DAG);
    }
    case ISD::MGATHER:
    case ISD::MSCATTER:       return PerformGatherScatterCombine(N, DAG);
    }
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 5 Jan 2016 09:12:17 +0000 (09:12 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 5 Jan 2016 09:12:17 +0000 (09:12 +0000)