[AArch64] Also combine vector selects fed by non-i1 SETCCs.

author Ahmed Bougacha <ahmed.bougacha@gmail.com>

Mon, 27 Apr 2015 21:43:12 +0000 (21:43 +0000)

committer Ahmed Bougacha <ahmed.bougacha@gmail.com>

Mon, 27 Apr 2015 21:43:12 +0000 (21:43 +0000)
author Ahmed Bougacha <ahmed.bougacha@gmail.com>
Mon, 27 Apr 2015 21:43:12 +0000 (21:43 +0000)
committer Ahmed Bougacha <ahmed.bougacha@gmail.com>
Mon, 27 Apr 2015 21:43:12 +0000 (21:43 +0000)
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index e66b07ea2f44cabec1bcc0fa93c873b70b2c03a8..cc60cc4592fd071081237652f1f24e7585995d7b 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8644,13 +8644,21 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
  /// the compare-mask instructions rather than going via NZCV, even if LHS and
  /// RHS are really scalar. This replaces any scalar setcc in the above pattern
  /// with a vector one followed by a DUP shuffle on the result.
-static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performSelectCombine(SDNode *N,
+                                    TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
    SDValue N0 = N->getOperand(0);
    EVT ResVT = N->getValueType(0);
  
-  if (N0.getOpcode() != ISD::SETCC || N0.getValueType() != MVT::i1)
+  if (N0.getOpcode() != ISD::SETCC)
      return SDValue();
  
+  // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
+  // scalar SetCCResultType. We also don't expect vectors, because we assume
+  // that selects fed by vector SETCCs are canonicalized to VSELECT.
+  assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&
+         "Scalar-SETCC feeding SELECT has unexpected result type!");
+
    // If NumMaskElts == 0, the comparison is larger than select result. The
    // largest real NEON comparison is 64-bits per lane, which means the result is
    // at most 32-bits and an illegal vector. Just bail out for now.
@@ -8674,6 +8682,10 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
    if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
      return SDValue();
  
+  // Make sure we didn't create illegal types, if we're not supposed to.
+  assert(DCI.isBeforeLegalize() ||
+         DAG.getTargetLoweringInfo().isTypeLegal(SrcVT));
+
    // First perform a vector comparison, where lane 0 is the one we're interested
    // in.
    SDLoc DL(N0);
@@ -8721,7 +8733,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
    case ISD::CONCAT_VECTORS:
      return performConcatVectorsCombine(N, DCI, DAG);
    case ISD::SELECT:
-    return performSelectCombine(N, DAG);
+    return performSelectCombine(N, DCI);
    case ISD::VSELECT:
      return performVSelectCombine(N, DCI.DAG);
    case ISD::STORE:
diff --git a/test/CodeGen/AArch64/arm64-neon-select_cc.ll b/test/CodeGen/AArch64/arm64-neon-select_cc.ll

index b98d2d9219b918a9b2c911016c8a3cea77cb3c33..fe765f4ef984eb94f05d8f043624b73dc358cf47 100644 (file)
--- a/test/CodeGen/AArch64/arm64-neon-select_cc.ll
+++ b/test/CodeGen/AArch64/arm64-neon-select_cc.ll
@@ -225,9 +225,9 @@ define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b
  define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b, float %c1, float %c2) #0 {
  ; CHECK-LABEL: test_select_cc_v3f32_fcmp_f32:
  ; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].4s, v2.4s, v3.4s
-; CHECK-NEXT: dup [[VMASK:v[0-9]+]].4s, [[MASK]].s[0]
-; CHECK-NEXT: bsl [[RES:v[0-9]+]].16b, v0.16b, v1.16b
-; CHECK-NEXT: mov v0.16b, [[RES]].16b
+; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
+; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b
+; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b
  ; CHECK-NEXT: ret
    %cc = fcmp oeq float %c1, %c2
    %r = select i1 %cc, <3 x float> %a, <3 x float> %b
@@ -236,12 +236,10 @@ define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b
  
  define <3 x float> @test_select_cc_v3f32_fcmp_f64(<3 x float> %a, <3 x float> %b, double %c1, double %c2) #0 {
  ; CHECK-LABEL: test_select_cc_v3f32_fcmp_f64:
-; CHECK-NEXT: fcmp d2, d3
-; CHECK-NEXT: movn [[N0:w[0-9]+]], #0
-; CHECK-NEXT: csel [[MASK:w[0-9]+]], [[N0]], wzr, eq
-; CHECK-NEXT: dup [[VMASK:v[0-9]+]].4s, [[MASK]]
-; CHECK-NEXT: bsl [[RES:v[0-9]+]].16b, v0.16b, v1.16b
-; CHECK-NEXT: mov v0.16b, [[RES]].16b
+; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].2d, v2.2d, v3.2d
+; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
+; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b
+; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b
  ; CHECK-NEXT: ret
    %cc = fcmp oeq double %c1, %c2
    %r = select i1 %cc, <3 x float> %a, <3 x float> %b
diff --git a/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll b/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll

index 74e3af8206f59fa2365f822dc14710ae40944af7..c739e9dcd906d6efa8a95828ed6a9d89bb077763 100644 (file)
--- a/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
+++ b/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
@@ -50,9 +50,15 @@ define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x doubl
    ret <1 x double> %res
  }
  
+; For v1i64, it's not clear which of the vector or scalar compare is better.
+; Let's stick to the vector form, like for all other vector selects fed by a
+; scalar setcc.  If anything, it exposes more ILP.
  define <1 x i64> @test_select_v1i1_3(i64 %lhs, i64 %rhs, <1 x i64> %v3) {
  ; CHECK-LABEL: test_select_v1i1_3:
-; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: fmov d{{[0-9]+}}, x{{[0-9]+}}
+; CHECK: fmov d{{[0-9]+}}, x{{[0-9]+}}
+; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
    %tst = icmp eq i64 %lhs, %rhs
    %evil = insertelement <1 x i1> undef, i1 %tst, i32 0
    %res = select <1 x i1> %evil, <1 x i64> zeroinitializer, <1 x i64> %v3
author	Ahmed Bougacha <ahmed.bougacha@gmail.com>
	Mon, 27 Apr 2015 21:43:12 +0000 (21:43 +0000)
committer	Ahmed Bougacha <ahmed.bougacha@gmail.com>
	Mon, 27 Apr 2015 21:43:12 +0000 (21:43 +0000)
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
test/CodeGen/AArch64/arm64-neon-select_cc.ll		patch \| blob \| history
test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll		patch \| blob \| history