[x86] replace integer logic ops with packed SSE FP logic ops

author Sanjay Patel <spatel@rotateright.com>

Tue, 27 Oct 2015 01:28:07 +0000 (01:28 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Tue, 27 Oct 2015 01:28:07 +0000 (01:28 +0000)
author Sanjay Patel <spatel@rotateright.com>
Tue, 27 Oct 2015 01:28:07 +0000 (01:28 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Tue, 27 Oct 2015 01:28:07 +0000 (01:28 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 6ffc524cdb8572affde564d114687df887f86d80..bf702e2788ab6b9e2dfd8d0e1b70f4f74c228137 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -23123,7 +23123,8 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
                       EltNo);
  }
  
                       EltNo);
  }
  
-static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG,
+                                     const X86Subtarget *Subtarget) {
    SDValue N0 = N->getOperand(0);
    EVT VT = N->getValueType(0);
  
    SDValue N0 = N->getOperand(0);
    EVT VT = N->getValueType(0);
  
@@ -23139,6 +23140,29 @@ static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
        return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00);
    }
  
        return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00);
    }
  
+  // Convert a bitcasted integer logic operation that has one bitcasted
+  // floating-point operand and one constant operand into a floating-point
+  // logic operation. This may create a load of the constant, but that is
+  // cheaper than materializing the constant in an integer register and
+  // transferring it to an SSE register or transferring the SSE operand to
+  // integer register and back.
+  unsigned FPOpcode;
+  switch (N0.getOpcode()) {
+    case ISD::AND: FPOpcode = X86ISD::FAND; break;
+    case ISD::OR:  FPOpcode = X86ISD::FOR;  break;
+    case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
+    default: return SDValue();
+  }
+  if (((Subtarget->hasSSE1() && VT == MVT::f32) ||
+       (Subtarget->hasSSE2() && VT == MVT::f64)) &&
+      isa<ConstantSDNode>(N0.getOperand(1)) &&
+      N0.getOperand(0).getOpcode() == ISD::BITCAST &&
+      N0.getOperand(0).getOperand(0).getValueType() == VT) {
+    SDValue N000 = N0.getOperand(0).getOperand(0);
+    SDValue FPConst = DAG.getBitcast(VT, N0.getOperand(1));
+    return DAG.getNode(FPOpcode, SDLoc(N0), VT, N000, FPConst);
+  }
+
    return SDValue();
  }
  
    return SDValue();
  }
  
@@ -26635,7 +26659,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
    case ISD::SELECT:
    case X86ISD::SHRUNKBLEND:
      return PerformSELECTCombine(N, DAG, DCI, Subtarget);
    case ISD::SELECT:
    case X86ISD::SHRUNKBLEND:
      return PerformSELECTCombine(N, DAG, DCI, Subtarget);
-  case ISD::BITCAST:        return PerformBITCASTCombine(N, DAG);
+  case ISD::BITCAST:        return PerformBITCASTCombine(N, DAG, Subtarget);
    case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI, Subtarget);
    case ISD::ADD:            return PerformAddCombine(N, DAG, Subtarget);
    case ISD::SUB:            return PerformSubCombine(N, DAG, Subtarget);
    case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI, Subtarget);
    case ISD::ADD:            return PerformAddCombine(N, DAG, Subtarget);
    case ISD::SUB:            return PerformSubCombine(N, DAG, Subtarget);
diff --git a/test/CodeGen/X86/fp-logic.ll b/test/CodeGen/X86/fp-logic.ll

index 6494d4967c35e791bda007b45bd32282a30f8dab..64c3f6b79a235000577ac372e3f6a771e30f434d 100644 (file)
--- a/test/CodeGen/X86/fp-logic.ll
+++ b/test/CodeGen/X86/fp-logic.ll
@@ -110,9 +110,8 @@ define float @f6(float %x, i32 %y) {
  define float @f7(float %x) {
  ; CHECK-LABEL: f7:
  ; CHECK:       # BB#0:
  define float @f7(float %x) {
  ; CHECK-LABEL: f7:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    movd %xmm0, %eax
-; CHECK-NEXT:    andl $3, %eax
-; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    andps %xmm1, %xmm0
  ; CHECK-NEXT:    retq
  
    %bc1 = bitcast float %x to i32
  ; CHECK-NEXT:    retq
  
    %bc1 = bitcast float %x to i32
@@ -126,9 +125,8 @@ define float @f7(float %x) {
  define float @f8(float %x) {
  ; CHECK-LABEL: f8:
  ; CHECK:       # BB#0:
  define float @f8(float %x) {
  ; CHECK-LABEL: f8:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    movd %xmm0, %eax
-; CHECK-NEXT:    andl $4, %eax
-; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    andps %xmm1, %xmm0
  ; CHECK-NEXT:    retq
  
    %bc1 = bitcast float %x to i32
  ; CHECK-NEXT:    retq
  
    %bc1 = bitcast float %x to i32
@@ -196,9 +194,8 @@ define float @xor(float %x, float %y) {
  define float @f7_or(float %x) {
  ; CHECK-LABEL: f7_or:
  ; CHECK:       # BB#0:
  define float @f7_or(float %x) {
  ; CHECK-LABEL: f7_or:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    movd %xmm0, %eax
-; CHECK-NEXT:    orl $3, %eax
-; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    orps %xmm1, %xmm0
  ; CHECK-NEXT:    retq
  
    %bc1 = bitcast float %x to i32
  ; CHECK-NEXT:    retq
  
    %bc1 = bitcast float %x to i32
@@ -210,9 +207,8 @@ define float @f7_or(float %x) {
  define float @f7_xor(float %x) {
  ; CHECK-LABEL: f7_xor:
  ; CHECK:       # BB#0:
  define float @f7_xor(float %x) {
  ; CHECK-LABEL: f7_xor:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    movd %xmm0, %eax
-; CHECK-NEXT:    xorl $3, %eax
-; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    xorps %xmm1, %xmm0
  ; CHECK-NEXT:    retq
  
    %bc1 = bitcast float %x to i32
  ; CHECK-NEXT:    retq
  
    %bc1 = bitcast float %x to i32
@@ -239,9 +235,8 @@ define double @doubles(double %x, double %y) {
  define double @f7_double(double %x) {
  ; CHECK-LABEL: f7_double:
  ; CHECK:       # BB#0:
  define double @f7_double(double %x) {
  ; CHECK-LABEL: f7_double:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    movd %xmm0, %rax
-; CHECK-NEXT:    andl $3, %eax
-; CHECK-NEXT:    movd %rax, %xmm0
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT:    andpd %xmm1, %xmm0
  ; CHECK-NEXT:    retq
  
    %bc1 = bitcast double %x to i64
  ; CHECK-NEXT:    retq
  
    %bc1 = bitcast double %x to i64
@@ -257,9 +252,8 @@ define double @f7_double(double %x) {
  define float @movmsk(float %x) {
  ; CHECK-LABEL: movmsk:
  ; CHECK:       # BB#0:
  define float @movmsk(float %x) {
  ; CHECK-LABEL: movmsk:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    movmskps %xmm0, %eax
-; CHECK-NEXT:    shll $31, %eax
-; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    andps %xmm1, %xmm0
  ; CHECK-NEXT:    retq
  
    %bc1 = bitcast float %x to i32
  ; CHECK-NEXT:    retq
  
    %bc1 = bitcast float %x to i32
author	Sanjay Patel <spatel@rotateright.com>
	Tue, 27 Oct 2015 01:28:07 +0000 (01:28 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Tue, 27 Oct 2015 01:28:07 +0000 (01:28 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/fp-logic.ll		patch \| blob \| history