Improve code generation for vselect on SSE2:

author Nadav Rotem <nadav.rotem@intel.com>

Wed, 19 Oct 2011 20:43:16 +0000 (20:43 +0000)

committer Nadav Rotem <nadav.rotem@intel.com>

Wed, 19 Oct 2011 20:43:16 +0000 (20:43 +0000)
author Nadav Rotem <nadav.rotem@intel.com>
Wed, 19 Oct 2011 20:43:16 +0000 (20:43 +0000)
committer Nadav Rotem <nadav.rotem@intel.com>
Wed, 19 Oct 2011 20:43:16 +0000 (20:43 +0000)
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

index 7fe35306bd2839e188f29a4aaa10db4e323be6db..4e02b90aad366d93910d2d73eed08407c19b7f3e 100644 (file)
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -394,10 +394,12 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
  
    // If we can't even use the basic vector operations of
    // AND,OR,XOR, we will have to scalarize the op.
-  if (!TLI.isOperationLegalOrCustom(ISD::AND, VT) ||
-      !TLI.isOperationLegalOrCustom(ISD::XOR, VT) ||
-      !TLI.isOperationLegalOrCustom(ISD::OR, VT))
-        return DAG.UnrollVectorOp(Op.getNode());
+  // Notice that the operation may be 'promoted' which means that it is
+  // 'bitcasted' to another type which is handled.
+  if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+      TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+      TLI.getOperationAction(ISD::OR,  VT) == TargetLowering::Expand)
+    return DAG.UnrollVectorOp(Op.getNode());
  
    assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits()
           && "Invalid mask size");
@@ -421,9 +423,9 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
    DebugLoc DL = Op.getDebugLoc();
  
    // Make sure that the SINT_TO_FP and SRL instructions are available.
-  if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) ||
-      !TLI.isOperationLegalOrCustom(ISD::SRL, VT))
-      return DAG.UnrollVectorOp(Op.getNode());
+  if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
+      TLI.getOperationAction(ISD::SRL,        VT) == TargetLowering::Expand)
+    return DAG.UnrollVectorOp(Op.getNode());
  
   EVT SVT = VT.getScalarType();
    assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll

index 56b099ec42e6aa6f8a04b288d01a18949d50b60e..0007cab0961f1adab2c59039e75dfc81156eca43 100644 (file)
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll
@@ -1,8 +1,10 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah -promote-elements -mattr=+sse2,-sse41 | FileCheck %s
-
-
-; currently (xor v4i32) is defined as illegal, so we scalarize the code.
+; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse41 | FileCheck %s
  
+; CHECK: vsel_float
+; CHECK: pandn
+; CHECK: pand
+; CHECK: por
+; CHECK: ret
  define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) {
    %A = load <4 x float>* %v1
    %B = load <4 x float>* %v2
@@ -11,8 +13,11 @@ define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) {
    ret void
  }
  
-; currently (xor v4i32) is defined as illegal, so we scalarize the code.
-
+; CHECK: vsel_i32
+; CHECK: pandn
+; CHECK: pand
+; CHECK: por
+; CHECK: ret
  define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
    %A = load <4 x i32>* %v1
    %B = load <4 x i32>* %v2
author	Nadav Rotem <nadav.rotem@intel.com>
	Wed, 19 Oct 2011 20:43:16 +0000 (20:43 +0000)
committer	Nadav Rotem <nadav.rotem@intel.com>
	Wed, 19 Oct 2011 20:43:16 +0000 (20:43 +0000)
lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp		patch \| blob \| history
test/CodeGen/X86/sse2-blend.ll		patch \| blob \| history