ArrayRef<int> Mask) const;
bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
- SDLoc dl);
+ bool &NeedInvert, SDLoc dl);
SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and
/// condition code CC on the current target.
+///
/// If the SETCC has been legalized using AND / OR, then the legalized node
-/// will be stored in LHS. RHS and CC will be set to SDValue().
+/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
+/// will be set to false.
+///
/// If the SETCC has been legalized by using getSetCCSwappedOperands(),
-/// then the values of LHS and RHS will be swapped and CC will be set to the
-/// new condition.
+/// then the values of LHS and RHS will be swapped, CC will be set to the
+/// new condition, and NeedInvert will be set to false.
+///
+/// If the SETCC has been legalized using the inverse condcode, then LHS and
+/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert
+/// will be set to true. The caller must invert the result of the SETCC with
+/// SelectionDAG::getNOT() or take equivalent action to swap the effect of a
+/// true/false result.
+///
/// \returns true if the SetCC has been legalized, false if it hasn't.
bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
SDValue &LHS, SDValue &RHS,
SDValue &CC,
+ bool &NeedInvert,
SDLoc dl) {
MVT OpVT = LHS.getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+ NeedInvert = false;
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
default: llvm_unreachable("Unknown condition code action!");
case TargetLowering::Legal:
case ISD::SETGT:
case ISD::SETGE:
case ISD::SETLT:
- case ISD::SETNE:
- case ISD::SETEQ:
// We only support using the inverted operation, which is computed above
// and not a different manner of supporting expanding these cases.
llvm_unreachable("Don't know how to expand this condition!");
+ case ISD::SETNE:
+ case ISD::SETEQ:
+ // Try inverting the result of the inverse condition.
+ InvCC = CCCode == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ;
+ if (TLI.isCondCodeLegal(InvCC, OpVT)) {
+ CC = DAG.getCondCode(InvCC);
+ NeedInvert = true;
+ return true;
+ }
+ // If inverting the condition didn't work then we have no means to expand
+ // the condition.
+ llvm_unreachable("Don't know how to expand this condition!");
}
SDValue SetCC1, SetCC2;
SmallVector<SDValue, 8> Results;
SDLoc dl(Node);
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ bool NeedInvert;
switch (Node->getOpcode()) {
case ISD::CTPOP:
case ISD::CTLZ:
Tmp2 = Node->getOperand(1);
Tmp3 = Node->getOperand(2);
bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2,
- Tmp3, dl);
+ Tmp3, NeedInvert, dl);
if (Legalized) {
- // If we exapanded the SETCC by swapping LHS and RHS, create a new SETCC
- // node.
+ // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
+ // condition code, create a new SETCC node.
if (Tmp3.getNode())
Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
Tmp1, Tmp2, Tmp3);
+ // If we expanded the SETCC by inverting the condition code, then wrap
+ // the existing SETCC in a NOT to restore the intended condition.
+ if (NeedInvert)
+ Tmp1 = DAG.getNOT(dl, Tmp1, Tmp1->getValueType(0));
+
Results.push_back(Tmp1);
break;
}
if (!Legalized) {
Legalized = LegalizeSetCCCondCode(
- getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, dl);
+ getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert,
+ dl);
assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
- // If we exapanded the SETCC by swapping LHS and RHS, create a new
- // SELECT_CC node.
+
+ // If we expanded the SETCC by inverting the condition code, then swap
+ // the True/False operands to match.
+ if (NeedInvert)
+ std::swap(Tmp3, Tmp4);
+
+ // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
+ // condition code, create a new SELECT_CC node.
if (CC.getNode()) {
Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0),
Tmp1, Tmp2, Tmp3, Tmp4, CC);
Tmp4 = Node->getOperand(1); // CC
bool Legalized = LegalizeSetCCCondCode(getSetCCResultType(
- Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl);
+ Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl);
(void)Legalized;
assert(Legalized && "Can't legalize BR_CC with legal condition!");
- // If we exapanded the SETCC by swapping LHS and RHS, create a new BR_CC
+ // If we expanded the SETCC by inverting the condition code, then wrap
+ // the existing SETCC in a NOT to restore the intended condition.
+ if (NeedInvert)
+ Tmp4 = DAG.getNOT(dl, Tmp4, Tmp4->getValueType(0));
+
+ // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC
// node.
if (Tmp4.getNode()) {
Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1,
; CHECK: .size clt_u_v2i64
}
+; There is no != comparison, but test it anyway since we've had legalizer
+; issues in this area.
+define void @cne_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+ ; CHECK: cne_v16i8:
+ %1 = load <16 x i8>* %a
+ ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <16 x i8>* %b
+ ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+ %3 = icmp ne <16 x i8> %1, %2
+ %4 = sext <16 x i1> %3 to <16 x i8>
+ ; CHECK-DAG: ceq.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; CHECK-DAG: xori.b [[R3]], [[R3]], 255
+ store <16 x i8> %4, <16 x i8>* %c
+ ; CHECK-DAG: st.b [[R3]], 0($4)
+
+ ret void
+ ; CHECK: .size cne_v16i8
+}
+
+; There is no != comparison, but test it anyway since we've had legalizer
+; issues in this area.
+define void @cne_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+ ; CHECK: cne_v8i16:
+
+ %1 = load <8 x i16>* %a
+ ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <8 x i16>* %b
+ ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+ %3 = icmp ne <8 x i16> %1, %2
+ %4 = sext <8 x i1> %3 to <8 x i16>
+ ; CHECK-DAG: ceq.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue
+ ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1
+ ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]]
+ store <8 x i16> %4, <8 x i16>* %c
+ ; CHECK-DAG: st.h [[R3]], 0($4)
+
+ ret void
+ ; CHECK: .size cne_v8i16
+}
+
+; There is no != comparison, but test it anyway since we've had legalizer
+; issues in this area.
+define void @cne_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+ ; CHECK: cne_v4i32:
+
+ %1 = load <4 x i32>* %a
+ ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <4 x i32>* %b
+ ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+ %3 = icmp ne <4 x i32> %1, %2
+ %4 = sext <4 x i1> %3 to <4 x i32>
+ ; CHECK-DAG: ceq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue
+ ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1
+ ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]]
+ store <4 x i32> %4, <4 x i32>* %c
+ ; CHECK-DAG: st.w [[R3]], 0($4)
+
+ ret void
+ ; CHECK: .size cne_v4i32
+}
+
+; There is no != comparison, but test it anyway since we've had legalizer
+; issues in this area.
+define void @cne_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+ ; CHECK: cne_v2i64:
+
+ %1 = load <2 x i64>* %a
+ ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <2 x i64>* %b
+ ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+ %3 = icmp ne <2 x i64> %1, %2
+ %4 = sext <2 x i1> %3 to <2 x i64>
+ ; CHECK-DAG: ceq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue
+ ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1
+ ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]]
+ store <2 x i64> %4, <2 x i64>* %c
+ ; CHECK-DAG: st.d [[R3]], 0($4)
+
+ ret void
+ ; CHECK: .size cne_v2i64
+}
+
define void @ceqi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: ceqi_v16i8:
--- /dev/null
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA with a
+; "Don't know how to expand this condition!" unreachable.
+; It should at least successfully build.
+
+define void @autogen_SD3861334421(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+ %A4 = alloca <2 x i32>
+ %A3 = alloca <2 x double>
+ %A2 = alloca i64
+ %A1 = alloca i64
+ %A = alloca double
+ %L = load i8* %0
+ store i8 -101, i8* %0
+ %E = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 0
+ %Shuff = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 undef, i32 1>
+ %I = insertelement <8 x i64> zeroinitializer, i64 %4, i32 5
+ %B = and i64 116376, 57247
+ %FC = uitofp i8 7 to double
+ %Sl = select i1 false, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %L5 = load i8* %0
+ store i8 %L, i8* %0
+ %E6 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 3
+ %Shuff7 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 6, i32 0>
+ %I8 = insertelement <8 x i8> %Sl, i8 7, i32 4
+ %B9 = or <8 x i64> zeroinitializer, zeroinitializer
+ %Sl10 = select i1 false, i64 116376, i64 380809
+ %Cmp = icmp sgt i32 394647, 17081
+ br label %CF
+
+CF: ; preds = %CF, %BB
+ %L11 = load i8* %0
+ store i8 -87, i8* %0
+ %E12 = extractelement <4 x i64> zeroinitializer, i32 0
+ %Shuff13 = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 7, i32 9, i32 11, i32 13, i32 undef, i32 1, i32 3, i32 5>
+ %I14 = insertelement <4 x i64> zeroinitializer, i64 380809, i32 1
+ %B15 = srem i64 %Sl10, 380809
+ %FC16 = sitofp i64 57247 to float
+ %Sl17 = select i1 false, double 0x87A9374869A78EC6, double 0.000000e+00
+ %Cmp18 = icmp uge i8 %L, %5
+ br i1 %Cmp18, label %CF, label %CF80
+
+CF80: ; preds = %CF80, %CF88, %CF
+ %L19 = load i8* %0
+ store i8 -101, i8* %0
+ %E20 = extractelement <4 x i64> zeroinitializer, i32 0
+ %Shuff21 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff7, <4 x i32> <i32 7, i32 1, i32 3, i32 5>
+ %I22 = insertelement <4 x i64> zeroinitializer, i64 127438, i32 1
+ %B23 = fdiv double %Sl17, 0.000000e+00
+ %Sl24 = select i1 %Cmp18, i32 420510, i32 492085
+ %Cmp25 = icmp ugt i1 %Cmp18, false
+ br i1 %Cmp25, label %CF80, label %CF83
+
+CF83: ; preds = %CF83, %CF80
+ %L26 = load i8* %0
+ store i8 -87, i8* %0
+ %E27 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 0
+ %Shuff28 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 7, i32 1, i32 3, i32 5>
+ %I29 = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 492085, i32 1
+ %B30 = lshr <8 x i8> %I8, %I8
+ %FC31 = sitofp <4 x i32> %Shuff28 to <4 x double>
+ %Sl32 = select i1 false, <8 x i8> %I8, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %Cmp33 = icmp eq i64 %B, 116376
+ br i1 %Cmp33, label %CF83, label %CF88
+
+CF88: ; preds = %CF83
+ %L34 = load i8* %0
+ store i8 -87, i8* %0
+ %E35 = extractelement <8 x i64> %Shuff, i32 7
+ %Shuff36 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %Shuff28, <4 x i32> <i32 2, i32 undef, i32 undef, i32 0>
+ %I37 = insertelement <4 x i64> zeroinitializer, i64 380809, i32 0
+ %B38 = xor <8 x i64> %B9, %B9
+ %ZE = zext i32 0 to i64
+ %Sl39 = select i1 %Cmp33, i8 %L11, i8 %L5
+ %Cmp40 = icmp sgt i1 %Cmp, false
+ br i1 %Cmp40, label %CF80, label %CF81
+
+CF81: ; preds = %CF81, %CF85, %CF87, %CF88
+ %L41 = load i8* %0
+ store i8 %L34, i8* %0
+ %E42 = extractelement <8 x i64> %Shuff13, i32 6
+ %Shuff43 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 7>
+ %I44 = insertelement <4 x i64> zeroinitializer, i64 116376, i32 3
+ %B45 = fsub float %FC16, 0x3AC86DCC40000000
+ %Tr = trunc <4 x i64> %I14 to <4 x i32>
+ %Sl46 = select i1 false, <8 x i64> %B38, <8 x i64> zeroinitializer
+ %Cmp47 = icmp sgt i1 %Cmp18, %Cmp18
+ br i1 %Cmp47, label %CF81, label %CF85
+
+CF85: ; preds = %CF81
+ %L48 = load i8* %0
+ store i8 -101, i8* %0
+ %E49 = extractelement <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, i32 2
+ %Shuff50 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 5, i32 7, i32 1, i32 3>
+ %I51 = insertelement <4 x i64> zeroinitializer, i64 %E20, i32 3
+ %B52 = or i32 336955, %Sl24
+ %FC53 = uitofp i8 %L48 to double
+ %Sl54 = select i1 %Cmp47, i32 %3, i32 %Sl24
+ %Cmp55 = icmp ne <8 x i64> %Shuff13, zeroinitializer
+ %L56 = load i8* %0
+ store i8 %L11, i8* %0
+ %E57 = extractelement <4 x i64> %Shuff21, i32 1
+ %Shuff58 = shufflevector <8 x i64> %Shuff, <8 x i64> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 undef, i32 10, i32 12, i32 undef, i32 0, i32 2>
+ %I59 = insertelement <4 x i64> zeroinitializer, i64 %E42, i32 2
+ %B60 = udiv <8 x i8> %Sl, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %Tr61 = trunc i8 49 to i1
+ br i1 %Tr61, label %CF81, label %CF84
+
+CF84: ; preds = %CF84, %CF85
+ %Sl62 = select i1 false, i8 %L, i8 %L48
+ %Cmp63 = icmp ne <8 x i64> %I, zeroinitializer
+ %L64 = load i8* %0
+ store i8 %5, i8* %0
+ %E65 = extractelement <8 x i1> %Cmp55, i32 0
+ br i1 %E65, label %CF84, label %CF87
+
+CF87: ; preds = %CF84
+ %Shuff66 = shufflevector <4 x i64> %Shuff21, <4 x i64> %I14, <4 x i32> <i32 3, i32 undef, i32 7, i32 1>
+ %I67 = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 %Sl54, i32 1
+ %B68 = frem double %B23, %Sl17
+ %ZE69 = zext <8 x i8> %Sl32 to <8 x i64>
+ %Sl70 = select i1 %Tr61, i64 %E20, i64 %E12
+ %Cmp71 = icmp slt <8 x i64> %I, %Shuff
+ %L72 = load i8* %0
+ store i8 %L72, i8* %0
+ %E73 = extractelement <8 x i1> %Cmp55, i32 6
+ br i1 %E73, label %CF81, label %CF82
+
+CF82: ; preds = %CF82, %CF87
+ %Shuff74 = shufflevector <4 x i32> %I67, <4 x i32> %I29, <4 x i32> <i32 1, i32 3, i32 undef, i32 7>
+ %I75 = insertelement <4 x i64> zeroinitializer, i64 380809, i32 3
+ %B76 = fsub double 0.000000e+00, %FC53
+ %Tr77 = trunc i32 %E to i8
+ %Sl78 = select i1 %Cmp18, i64* %A2, i64* %2
+ %Cmp79 = icmp eq i32 394647, 492085
+ br i1 %Cmp79, label %CF82, label %CF86
+
+CF86: ; preds = %CF82
+ store i64 %Sl70, i64* %Sl78
+ store i64 %E57, i64* %Sl78
+ store i64 %Sl70, i64* %Sl78
+ store i64 %B, i64* %Sl78
+ store i64 %Sl10, i64* %Sl78
+ ret void
+}