[X86] Always prefer to lower a VECTOR_SHUFFLE into a BLENDI instead of SHUFP (or...

author Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>

Wed, 25 Jun 2014 17:41:58 +0000 (17:41 +0000)

committer Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>

Wed, 25 Jun 2014 17:41:58 +0000 (17:41 +0000)
author Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Wed, 25 Jun 2014 17:41:58 +0000 (17:41 +0000)
committer Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Wed, 25 Jun 2014 17:41:58 +0000 (17:41 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 989e6f1ee81d58f801dd9004fc802de2d56229b3..cde413f4551c63584c25b4a727aaadf0685722dc 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -8337,6 +8337,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
                                  getShufflePSHUFLWImmediate(SVOp),
                                  DAG);
  
+  unsigned MaskValue;
+  if (isBlendMask(M, VT, Subtarget->hasSSE41(), Subtarget->hasInt256(),
+                  &MaskValue))
+    return LowerVECTOR_SHUFFLEtoBlend(SVOp, MaskValue, Subtarget, DAG);
+
    if (isSHUFPMask(M, VT))
      return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
                                  getShuffleSHUFImmediate(SVOp), DAG);
@@ -8374,11 +8379,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
      return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
                                  V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
  
-  unsigned MaskValue;
-  if (isBlendMask(M, VT, Subtarget->hasSSE41(), Subtarget->hasInt256(),
-                  &MaskValue))
-    return LowerVECTOR_SHUFFLEtoBlend(SVOp, MaskValue, Subtarget, DAG);
-
    if (Subtarget->hasSSE41() && isINSERTPSMask(M, VT))
      return getINSERTPS(SVOp, dl, DAG);
  
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 988a0598975410e926de5f4ef3889edd2ddde8d9..e6ca519b7d5db187f616109bbdf45893404f7b0a 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -5374,8 +5374,8 @@ let Predicates = [HasAVX] in {
    // - the 1st and 3rd element from the first input vector (the 'fsub' node);
    // - the 2nd and 4th element from the second input vector (the 'fadd' node).
  
-  def : Pat<(v4f64 (X86Shufp (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
-                             (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))),
+  def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
+                             (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
              (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
    def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
                                (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll

index 43cdf7edf70a23039515fff0c067f9d1f9477bf0..d2a22d7094741eb1742a83beaeb42d2d89ac7e7e 100644 (file)
--- a/test/CodeGen/X86/avx-blend.ll
+++ b/test/CodeGen/X86/avx-blend.ll
@@ -110,7 +110,7 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
  
  ;CHECK-LABEL: vsel_double4:
  ;CHECK-NOT: vinsertf128
-;CHECK: vshufpd $10
+;CHECK: vblendpd $10
  ;CHECK-NEXT: ret
  define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
    %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll

index f3f7e554a33bb9b59ea0f6c32c43e0f9628fe03d..4a996d79815c1dc0b0f231ff0079899b06bb1a73 100644 (file)
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -25,7 +25,7 @@ define <4 x i64> @test3(<4 x i64> %a, <4 x i64> %b) nounwind {
    %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 undef>
    ret <4 x i64> %c
  ; CHECK-LABEL: test3:
-; CHECK: vperm2f128
+; CHECK: vblendpd
  ; CHECK: ret
  }
  
diff --git a/test/CodeGen/X86/avx-vperm2f128.ll b/test/CodeGen/X86/avx-vperm2f128.ll

index caa21e5bacfed285d76794af8ef4defa9892b8c7..c20775bacad288750f5ac4aa517b26fbad7ea7bd 100644 (file)
--- a/test/CodeGen/X86/avx-vperm2f128.ll
+++ b/test/CodeGen/X86/avx-vperm2f128.ll
@@ -9,7 +9,7 @@ entry:
  }
  
  ; CHECK: _B
-; CHECK: vperm2f128 $48
+; CHECK: vblendps $240
  define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
  entry:
    %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
diff --git a/test/CodeGen/X86/avx-vshufp.ll b/test/CodeGen/X86/avx-vshufp.ll

index 45883b717380d6222259d7edf49d0537341893b2..ad3dbc1ed89385213237d39e168205c82f38bf9b 100644 (file)
--- a/test/CodeGen/X86/avx-vshufp.ll
+++ b/test/CodeGen/X86/avx-vshufp.ll
@@ -32,14 +32,14 @@ entry:
    ret <8 x i32> %shuffle
  }
  
-; CHECK: vshufpd  $10, %ymm
+; CHECK: vblendpd  $10, %ymm
  define <4 x double> @B(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
  entry:
    %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    ret <4 x double> %shuffle
  }
  
-; CHECK: vshufpd  $10, (%{{.*}}), %ymm
+; CHECK: vblendpd  $10, (%{{.*}}), %ymm
  define <4 x double> @B2(<4 x double>* %a, <4 x double>* %b) nounwind uwtable readnone ssp {
  entry:
    %a2 = load <4 x double>* %a
@@ -48,14 +48,14 @@ entry:
    ret <4 x double> %shuffle
  }
  
-; CHECK: vshufpd  $10, %ymm
+; CHECK: vblendpd  $10, %ymm
  define <4 x i64> @B3(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  entry:
    %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    ret <4 x i64> %shuffle
  }
  
-; CHECK: vshufpd  $10, (%{{.*}}), %ymm
+; CHECK: vblendpd  $10, (%{{.*}}), %ymm
  define <4 x i64> @B4(<4 x i64>* %a, <4 x i64>* %b) nounwind uwtable readnone ssp {
  entry:
    %a2 = load <4 x i64>* %a
@@ -71,7 +71,7 @@ entry:
    ret <8 x float> %shuffle
  }
  
-; CHECK: vshufpd  $2, %ymm
+; CHECK: vblendpd  $2, %ymm
  define <4 x double> @D(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
  entry:
    %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 undef>
diff --git a/test/CodeGen/X86/combine-or.ll b/test/CodeGen/X86/combine-or.ll

index 572aded5e9a3ff5e3acab6e23bd72840b33175b7..ff807b98717d34830c508374507e736297f4ddb0 100644 (file)
--- a/test/CodeGen/X86/combine-or.ll
+++ b/test/CodeGen/X86/combine-or.ll
@@ -74,7 +74,7 @@ define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
  }
  ; CHECK-LABEL: test6
  ; CHECK-NOT: xorps
-; CHECK: shufps
+; CHECK: blendps $12
  ; CHECK-NEXT: ret
  
  
@@ -86,7 +86,7 @@ define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
  }
  ; CHECK-LABEL: test7
  ; CHECK-NOT: xorps
-; CHECK: shufps
+; CHECK: blendps $12
  ; CHECK-NEXT: ret
author	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
	Wed, 25 Jun 2014 17:41:58 +0000 (17:41 +0000)
committer	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
	Wed, 25 Jun 2014 17:41:58 +0000 (17:41 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
test/CodeGen/X86/avx-blend.ll		patch \| blob \| history
test/CodeGen/X86/avx-shuffle.ll		patch \| blob \| history
test/CodeGen/X86/avx-vperm2f128.ll		patch \| blob \| history
test/CodeGen/X86/avx-vshufp.ll		patch \| blob \| history
test/CodeGen/X86/combine-or.ll		patch \| blob \| history