From e460807bcde46e9c72cf8c3d69fdfcb3906bbc0c Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Wed, 17 Jun 2015 04:02:32 +0000 Subject: [PATCH] Revert "AArch64: Use CMP;CCMP sequences for and/or/setcc trees." The patch triggers a miscompile on SPEC 2006 403.gcc with the (ref) 200.i and scilab.i inputs. I opened PR23866 to track analysis of this. This reverts commit r238793. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@239880 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 214 ++++----------------- lib/Target/AArch64/AArch64ISelLowering.h | 7 - lib/Target/AArch64/AArch64InstrFormats.td | 77 +++----- lib/Target/AArch64/AArch64InstrInfo.td | 29 +-- test/CodeGen/AArch64/arm64-ccmp.ll | 40 ---- 5 files changed, 72 insertions(+), 295 deletions(-) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index b4ed3e0dec7..1fa266a87b9 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -76,9 +76,6 @@ cl::opt EnableAArch64ELFLocalDynamicTLSGeneration( cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false)); -/// Value type used for condition codes. -static const MVT MVT_CC = MVT::i32; - AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI) : TargetLowering(TM), Subtarget(&STI) { @@ -810,9 +807,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::ADCS: return "AArch64ISD::ADCS"; case AArch64ISD::SBCS: return "AArch64ISD::SBCS"; case AArch64ISD::ANDS: return "AArch64ISD::ANDS"; - case AArch64ISD::CCMP: return "AArch64ISD::CCMP"; - case AArch64ISD::CCMN: return "AArch64ISD::CCMN"; - case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP"; case AArch64ISD::FCMP: return "AArch64ISD::FCMP"; case AArch64ISD::FMIN: return "AArch64ISD::FMIN"; case AArch64ISD::FMAX: return "AArch64ISD::FMAX"; @@ -1171,133 +1165,10 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, LHS = LHS.getOperand(0); } - return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS) + return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS) .getValue(1); } -static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, - ISD::CondCode CC, SDValue CCOp, - SDValue Condition, unsigned NZCV, - SDLoc DL, SelectionDAG &DAG) { - unsigned Opcode = 0; - if (LHS.getValueType().isFloatingPoint()) - Opcode = AArch64ISD::FCCMP; - else if (RHS.getOpcode() == ISD::SUB) { - SDValue SubOp0 = RHS.getOperand(0); - if (const ConstantSDNode *SubOp0C = dyn_cast(SubOp0)) - if (SubOp0C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { - // See emitComparison() on why we can only do this for SETEQ and SETNE. - Opcode = AArch64ISD::CCMN; - RHS = RHS.getOperand(1); - } - } - if (Opcode == 0) - Opcode = AArch64ISD::CCMP; - - SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32); - return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp); -} - -/// Returns true if @p Val is a tree of AND/OR/SETCC operations. -static bool isConjunctionDisjunctionTree(const SDValue Val, unsigned Depth) { - if (!Val.hasOneUse()) - return false; - if (Val->getOpcode() == ISD::SETCC) - return true; - // Protect against stack overflow. - if (Depth > 1000) - return false; - if (Val->getOpcode() == ISD::AND || Val->getOpcode() == ISD::OR) { - SDValue O0 = Val->getOperand(0); - SDValue O1 = Val->getOperand(1); - return isConjunctionDisjunctionTree(O0, Depth+1) && - isConjunctionDisjunctionTree(O1, Depth+1); - } - return false; -} - -/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain -/// of CCMP/CFCMP ops. For example (SETCC_0 & SETCC_1) with condition cond0 and -/// cond1 can be transformed into "CMP; CCMP" with CCMP executing on cond_0 -/// and setting flags to inversed(cond_1) otherwise. -/// This recursive function produces DAG nodes that produce condition flags -/// suitable to determine the truth value of @p Val (which is AND/OR/SETCC) -/// by testing the result for the condition set to @p OutCC. If @p Negate is -/// set the opposite truth value is produced. If @p CCOp and @p Condition are -/// given then conditional comparison are created so that false is reported -/// when they are false. -static SDValue emitConjunctionDisjunctionTree( - SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, - SDValue CCOp = SDValue(), AArch64CC::CondCode Condition = AArch64CC::AL) { - assert(isConjunctionDisjunctionTree(Val, 0)); - // We're at a tree leaf, produce a c?f?cmp. - unsigned Opcode = Val->getOpcode(); - if (Opcode == ISD::SETCC) { - SDValue LHS = Val->getOperand(0); - SDValue RHS = Val->getOperand(1); - ISD::CondCode CC = cast(Val->getOperand(2))->get(); - bool isInteger = LHS.getValueType().isInteger(); - if (Negate) - CC = getSetCCInverse(CC, isInteger); - SDLoc DL(Val); - // Determine OutCC and handle FP special case. - if (isInteger) { - OutCC = changeIntCCToAArch64CC(CC); - } else { - assert(LHS.getValueType().isFloatingPoint()); - AArch64CC::CondCode ExtraCC; - changeFPCCToAArch64CC(CC, OutCC, ExtraCC); - // Surpisingly some floating point conditions can't be tested with a - // single condition code. Construct an additional comparison in this case. - // See comment below on how we deal with OR conditions. - if (ExtraCC != AArch64CC::AL) { - SDValue ExtraCmp; - if (!CCOp.getNode()) - ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG); - else { - SDValue ConditionOp = DAG.getConstant(Condition, DL, MVT_CC); - // Note that we want the inverse of ExtraCC, so NZCV is not inversed. - unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(ExtraCC); - ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, - NZCV, DL, DAG); - } - CCOp = ExtraCmp; - Condition = AArch64CC::getInvertedCondCode(ExtraCC); - OutCC = AArch64CC::getInvertedCondCode(OutCC); - } - } - - // Produce a normal comparison if we are first in the chain - if (!CCOp.getNode()) - return emitComparison(LHS, RHS, CC, DL, DAG); - // Otherwise produce a ccmp. - SDValue ConditionOp = DAG.getConstant(Condition, DL, MVT_CC); - AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); - unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); - return emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, NZCV, DL, - DAG); - } - - // Construct comparison sequence for the left hand side. - SDValue LHS = Val->getOperand(0); - SDValue RHS = Val->getOperand(1); - - // We can only implement AND-like behaviour here, but negation is free. So we - // use (not (and (not x) (not y))) to implement (or x y). - bool isOr = Val->getOpcode() == ISD::OR; - assert((isOr || Val->getOpcode() == ISD::AND) && "Should have AND or OR."); - Negate ^= isOr; - - AArch64CC::CondCode RHSCC; - SDValue CmpR = - emitConjunctionDisjunctionTree(DAG, RHS, RHSCC, isOr, CCOp, Condition); - SDValue CmpL = - emitConjunctionDisjunctionTree(DAG, LHS, OutCC, isOr, CmpR, RHSCC); - if (Negate) - OutCC = AArch64CC::getInvertedCondCode(OutCC); - return CmpL; -} - static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) { SDValue Cmp; @@ -1356,55 +1227,47 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, } } } + // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095. + // For the i8 operand, the largest immediate is 255, so this can be easily + // encoded in the compare instruction. For the i16 operand, however, the + // largest immediate cannot be encoded in the compare. + // Therefore, use a sign extending load and cmn to avoid materializing the -1 + // constant. For example, + // movz w1, #65535 + // ldrh w0, [x0, #0] + // cmp w0, w1 + // > + // ldrsh w0, [x0, #0] + // cmn w0, #1 + // Fundamental, we're relying on the property that (zext LHS) == (zext RHS) + // if and only if (sext LHS) == (sext RHS). The checks are in place to ensure + // both the LHS and RHS are truely zero extended and to make sure the + // transformation is profitable. if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa(RHS)) { - const ConstantSDNode *RHSC = cast(RHS); - - // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095. - // For the i8 operand, the largest immediate is 255, so this can be easily - // encoded in the compare instruction. For the i16 operand, however, the - // largest immediate cannot be encoded in the compare. - // Therefore, use a sign extending load and cmn to avoid materializing the - // -1 constant. For example, - // movz w1, #65535 - // ldrh w0, [x0, #0] - // cmp w0, w1 - // > - // ldrsh w0, [x0, #0] - // cmn w0, #1 - // Fundamental, we're relying on the property that (zext LHS) == (zext RHS) - // if and only if (sext LHS) == (sext RHS). The checks are in place to - // ensure both the LHS and RHS are truely zero extended and to make sure the - // transformation is profitable. - if ((RHSC->getZExtValue() >> 16 == 0) && isa(LHS) && - cast(LHS)->getExtensionType() == ISD::ZEXTLOAD && - cast(LHS)->getMemoryVT() == MVT::i16 && - LHS.getNode()->hasNUsesOfValue(1, 0)) { - int16_t ValueofRHS = cast(RHS)->getZExtValue(); - if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) { - SDValue SExt = - DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS, - DAG.getValueType(MVT::i16)); - Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl, - RHS.getValueType()), - CC, dl, DAG); - AArch64CC = changeIntCCToAArch64CC(CC); - goto CreateCCNode; + if ((cast(RHS)->getZExtValue() >> 16 == 0) && + isa(LHS)) { + if (cast(LHS)->getExtensionType() == ISD::ZEXTLOAD && + cast(LHS)->getMemoryVT() == MVT::i16 && + LHS.getNode()->hasNUsesOfValue(1, 0)) { + int16_t ValueofRHS = cast(RHS)->getZExtValue(); + if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) { + SDValue SExt = + DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS, + DAG.getValueType(MVT::i16)); + Cmp = emitComparison(SExt, + DAG.getConstant(ValueofRHS, dl, + RHS.getValueType()), + CC, dl, DAG); + AArch64CC = changeIntCCToAArch64CC(CC); + AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32); + return Cmp; + } } } - - if ((RHSC->isNullValue() || RHSC->isOne()) && - isConjunctionDisjunctionTree(LHS, 0)) { - bool Negate = (CC == ISD::SETNE) ^ RHSC->isNullValue(); - Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC, Negate); - goto CreateCCNode; - } } - Cmp = emitComparison(LHS, RHS, CC, dl, DAG); AArch64CC = changeIntCCToAArch64CC(CC); - -CreateCCNode: - AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC); + AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32); return Cmp; } @@ -9269,8 +9132,3 @@ bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { return Ty->isArrayTy(); } - -bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, - EVT) const { - return false; -} diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index db192c78169..da42376ac25 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -58,11 +58,6 @@ enum NodeType : unsigned { SBCS, ANDS, - // Conditional compares. Operands: left,right,falsecc,cc,flags - CCMP, - CCMN, - FCCMP, - // Floating point comparison FCMP, @@ -513,8 +508,6 @@ private: bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override; - - bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; }; namespace AArch64 { diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 1fe9c7f8cc5..2c52f340d6d 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -525,13 +525,6 @@ def imm0_31 : Operand, ImmLeaf, ImmLeaf { - let ParserMatchClass = Imm0_31Operand; -} - // imm0_15 predicate - True if the immediate is in the range [0,15] def imm0_15 : Operand, ImmLeaf, ImmLeaf, ImmLeaf { - let ParserMatchClass = Imm0_15Operand; -} +}]>; // An arithmetic shifter operand: // {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr @@ -2077,12 +2068,9 @@ multiclass LogicalRegS opc, bit N, string mnemonic, //--- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseCondComparisonImm - : I<(outs), (ins regtype:$Rn, immtype:$imm, imm32_0_15:$nzcv, ccode:$cond), - mnemonic, "\t$Rn, $imm, $nzcv, $cond", "", - [(set NZCV, (OpNode regtype:$Rn, immtype:$imm, (i32 imm:$nzcv), - (i32 imm:$cond), NZCV))]>, +class BaseCondSetFlagsImm + : I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond), + asm, "\t$Rn, $imm, $nzcv, $cond", "", []>, Sched<[WriteI, ReadI]> { let Uses = [NZCV]; let Defs = [NZCV]; @@ -2102,13 +2090,19 @@ class BaseCondComparisonImm { + def Wi : BaseCondSetFlagsImm { + let Inst{31} = 0; + } + def Xi : BaseCondSetFlagsImm { + let Inst{31} = 1; + } +} + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseCondComparisonReg - : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), - mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", - [(set NZCV, (OpNode regtype:$Rn, regtype:$Rm, (i32 imm:$nzcv), - (i32 imm:$cond), NZCV))]>, +class BaseCondSetFlagsReg + : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond), + asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>, Sched<[WriteI, ReadI, ReadI]> { let Uses = [NZCV]; let Defs = [NZCV]; @@ -2128,19 +2122,11 @@ class BaseCondComparisonReg { - // immediate operand variants - def Wi : BaseCondComparisonImm { +multiclass CondSetFlagsReg { + def Wr : BaseCondSetFlagsReg { let Inst{31} = 0; } - def Xi : BaseCondComparisonImm { - let Inst{31} = 1; - } - // register operand variants - def Wr : BaseCondComparisonReg { - let Inst{31} = 0; - } - def Xr : BaseCondComparisonReg { + def Xr : BaseCondSetFlagsReg { let Inst{31} = 1; } } @@ -3948,14 +3934,11 @@ multiclass FPComparison pat> - : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), - mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", pat>, +class BaseFPCondComparison + : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond), + asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>, Sched<[WriteFCmp]> { - let Uses = [NZCV]; - let Defs = [NZCV]; - bits<5> Rn; bits<5> Rm; bits<4> nzcv; @@ -3971,18 +3954,16 @@ class BaseFPCondComparison { - def Srr : BaseFPCondComparison { +multiclass FPCondComparison { + let Defs = [NZCV], Uses = [NZCV] in { + def Srr : BaseFPCondComparison { let Inst{22} = 0; } - def Drr : BaseFPCondComparison { + + def Drr : BaseFPCondComparison { let Inst{22} = 1; } + } // Defs = [NZCV], Uses = [NZCV] } //--- diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 2f1b8933bf6..653f80286b2 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -66,20 +66,6 @@ def SDT_AArch64CSel : SDTypeProfile<1, 4, SDTCisSameAs<0, 2>, SDTCisInt<3>, SDTCisVT<4, i32>]>; -def SDT_AArch64CCMP : SDTypeProfile<1, 5, - [SDTCisVT<0, i32>, - SDTCisInt<1>, - SDTCisSameAs<1, 2>, - SDTCisInt<3>, - SDTCisInt<4>, - SDTCisVT<5, i32>]>; -def SDT_AArch64FCCMP : SDTypeProfile<1, 5, - [SDTCisVT<0, i32>, - SDTCisFP<1>, - SDTCisSameAs<1, 2>, - SDTCisInt<3>, - SDTCisInt<4>, - SDTCisVT<5, i32>]>; def SDT_AArch64FCmp : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>; @@ -174,10 +160,6 @@ def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; -def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; -def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; -def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; - def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; @@ -1036,10 +1018,13 @@ def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; //===----------------------------------------------------------------------===// -// Conditional comparison instructions. +// Conditionally set flags instructions. //===----------------------------------------------------------------------===// -defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>; -defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>; +defm CCMN : CondSetFlagsImm<0, "ccmn">; +defm CCMP : CondSetFlagsImm<1, "ccmp">; + +defm CCMN : CondSetFlagsReg<0, "ccmn">; +defm CCMP : CondSetFlagsReg<1, "ccmp">; //===----------------------------------------------------------------------===// // Conditional select instructions. @@ -2569,7 +2554,7 @@ defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>; //===----------------------------------------------------------------------===// defm FCCMPE : FPCondComparison<1, "fccmpe">; -defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>; +defm FCCMP : FPCondComparison<0, "fccmp">; //===----------------------------------------------------------------------===// // Floating point conditional select instruction. diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll index 11228c7e880..ff18f736433 100644 --- a/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/test/CodeGen/AArch64/arm64-ccmp.ll @@ -287,43 +287,3 @@ sw.bb.i.i: %code1.i.i.phi.trans.insert = getelementptr inbounds %str1, %str1* %0, i64 0, i32 0, i32 0, i64 16 br label %sw.bb.i.i } - -; CHECK-LABEL: select_and -define i64 @select_and(i32 %v1, i32 %v2, i64 %a, i64 %b) { -; CHECK: cmp -; CHECK: ccmp{{.*}}, #0, ne -; CHECK: csel{{.*}}, lt - %1 = icmp slt i32 %v1, %v2 - %2 = icmp ne i32 5, %v2 - %3 = and i1 %1, %2 - %sel = select i1 %3, i64 %a, i64 %b - ret i64 %sel -} - -; CHECK-LABEL: select_or -define i64 @select_or(i32 %v1, i32 %v2, i64 %a, i64 %b) { -; CHECK: cmp -; CHECK: ccmp{{.*}}, #8, eq -; CHECK: csel{{.*}}, lt - %1 = icmp slt i32 %v1, %v2 - %2 = icmp ne i32 5, %v2 - %3 = or i1 %1, %2 - %sel = select i1 %3, i64 %a, i64 %b - ret i64 %sel -} - -; CHECK-LABEL: select_complicated -define i16 @select_complicated(double %v1, double %v2, i16 %a, i16 %b) { -; CHECK: fcmp -; CHECK: fccmp{{.*}}, #4, ne -; CHECK: fccmp{{.*}}, #1, ne -; CHECK: fccmp{{.*}}, #4, vc -; CEHCK: csel{{.*}}, eq - %1 = fcmp one double %v1, %v2 - %2 = fcmp oeq double %v2, 13.0 - %3 = fcmp oeq double %v1, 42.0 - %or0 = or i1 %2, %3 - %or1 = or i1 %1, %or0 - %sel = select i1 %or1, i16 %a, i16 %b - ret i16 %sel -} -- 2.34.1