After legalization, scalar SETCC has an i32 result type on AArch64.
The i1 requirement seems too conservative, replace it with an assert.
This also means that we now can run after legalization. That should also
be fine, since the ops legalizer runs again after each combine, and
all types created all have the same sizes as the (legal) inputs.
Exposed by r235917; while there, robustize its tests (bsl also uses the
register it defines).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235922
91177308-0d34-0410-b5e6-
96231b3b80d8
/// the compare-mask instructions rather than going via NZCV, even if LHS and
/// RHS are really scalar. This replaces any scalar setcc in the above pattern
/// with a vector one followed by a DUP shuffle on the result.
-static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performSelectCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0);
EVT ResVT = N->getValueType(0);
- if (N0.getOpcode() != ISD::SETCC || N0.getValueType() != MVT::i1)
+ if (N0.getOpcode() != ISD::SETCC)
return SDValue();
+ // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
+ // scalar SetCCResultType. We also don't expect vectors, because we assume
+ // that selects fed by vector SETCCs are canonicalized to VSELECT.
+ assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&
+ "Scalar-SETCC feeding SELECT has unexpected result type!");
+
// If NumMaskElts == 0, the comparison is larger than select result. The
// largest real NEON comparison is 64-bits per lane, which means the result is
// at most 32-bits and an illegal vector. Just bail out for now.
if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
return SDValue();
+ // Make sure we didn't create illegal types, if we're not supposed to.
+ assert(DCI.isBeforeLegalize() ||
+ DAG.getTargetLoweringInfo().isTypeLegal(SrcVT));
+
// First perform a vector comparison, where lane 0 is the one we're interested
// in.
SDLoc DL(N0);
case ISD::CONCAT_VECTORS:
return performConcatVectorsCombine(N, DCI, DAG);
case ISD::SELECT:
- return performSelectCombine(N, DAG);
+ return performSelectCombine(N, DCI);
case ISD::VSELECT:
return performVSelectCombine(N, DCI.DAG);
case ISD::STORE:
define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b, float %c1, float %c2) #0 {
; CHECK-LABEL: test_select_cc_v3f32_fcmp_f32:
; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].4s, v2.4s, v3.4s
-; CHECK-NEXT: dup [[VMASK:v[0-9]+]].4s, [[MASK]].s[0]
-; CHECK-NEXT: bsl [[RES:v[0-9]+]].16b, v0.16b, v1.16b
-; CHECK-NEXT: mov v0.16b, [[RES]].16b
+; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
+; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b
+; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b
; CHECK-NEXT: ret
%cc = fcmp oeq float %c1, %c2
%r = select i1 %cc, <3 x float> %a, <3 x float> %b
define <3 x float> @test_select_cc_v3f32_fcmp_f64(<3 x float> %a, <3 x float> %b, double %c1, double %c2) #0 {
; CHECK-LABEL: test_select_cc_v3f32_fcmp_f64:
-; CHECK-NEXT: fcmp d2, d3
-; CHECK-NEXT: movn [[N0:w[0-9]+]], #0
-; CHECK-NEXT: csel [[MASK:w[0-9]+]], [[N0]], wzr, eq
-; CHECK-NEXT: dup [[VMASK:v[0-9]+]].4s, [[MASK]]
-; CHECK-NEXT: bsl [[RES:v[0-9]+]].16b, v0.16b, v1.16b
-; CHECK-NEXT: mov v0.16b, [[RES]].16b
+; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].2d, v2.2d, v3.2d
+; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
+; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b
+; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b
; CHECK-NEXT: ret
%cc = fcmp oeq double %c1, %c2
%r = select i1 %cc, <3 x float> %a, <3 x float> %b
ret <1 x double> %res
}
+; For v1i64, it's not clear which of the vector or scalar compare is better.
+; Let's stick to the vector form, like for all other vector selects fed by a
+; scalar setcc. If anything, it exposes more ILP.
define <1 x i64> @test_select_v1i1_3(i64 %lhs, i64 %rhs, <1 x i64> %v3) {
; CHECK-LABEL: test_select_v1i1_3:
-; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: fmov d{{[0-9]+}}, x{{[0-9]+}}
+; CHECK: fmov d{{[0-9]+}}, x{{[0-9]+}}
+; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
%tst = icmp eq i64 %lhs, %rhs
%evil = insertelement <1 x i1> undef, i1 %tst, i32 0
%res = select <1 x i1> %evil, <1 x i64> zeroinitializer, <1 x i64> %v3