From ae9a7964ef6fc98298f945ca370a63f4a3588477 Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Tue, 19 Aug 2014 22:29:55 +0000 Subject: [PATCH] [FastISel][AArch64] Factor out ADDS/SUBS instruction emission and add support for extensions and shift folding. Factor out the ADDS/SUBS instruction emission code into helper functions and make the helper functions more clever to support most of the different ADDS/SUBS instructions the architecture support. This includes better immedediate support, shift folding, and sign-/zero-extend folding. This fixes . git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216033 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64FastISel.cpp | 621 ++++++++++++------- test/CodeGen/AArch64/arm64-fast-isel-icmp.ll | 148 +++-- test/CodeGen/AArch64/arm64-xaluo.ll | 14 + 3 files changed, 483 insertions(+), 300 deletions(-) diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 25970b2378e..9f59f884dee 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -146,14 +146,46 @@ private: bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, const Value *Cond); + // Emit helper routines. + unsigned emitAddsSubs(bool UseAdds, MVT RetVT, const Value *LHS, + const Value *RHS, bool IsZExt = false, + bool WantResult = true); + unsigned emitAddsSubs_rr(bool UseAdds, MVT RetVT, unsigned LHSReg, + bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, + bool WantResult = true); + unsigned emitAddsSubs_ri(bool UseAdds, MVT RetVT, unsigned LHSReg, + bool LHSIsKill, uint64_t Imm, + bool WantResult = true); + unsigned emitAddsSubs_rs(bool UseAdds, MVT RetVT, unsigned LHSReg, + bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, + AArch64_AM::ShiftExtendType ShiftType, + uint64_t ShiftImm, bool WantResult = true); + unsigned emitAddsSubs_rx(bool UseAdds, MVT RetVT, unsigned LHSReg, + bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, + AArch64_AM::ShiftExtendType ExtType, + uint64_t ShiftImm, bool WantResult = true); + // Emit functions. - bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt); + bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); + bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); + bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); + bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, MachineMemOperand *MMO = nullptr); bool EmitStore(MVT VT, unsigned SrcReg, Address Addr, MachineMemOperand *MMO = nullptr); unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); + unsigned emitAdds(MVT RetVT, const Value *LHS, const Value *RHS, + bool IsZExt = false, bool WantResult = true); + unsigned emitSubs(MVT RetVT, const Value *LHS, const Value *RHS, + bool IsZExt = false, bool WantResult = true); + unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill, + unsigned RHSReg, bool RHSIsKill, bool WantResult = true); + unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill, + unsigned RHSReg, bool RHSIsKill, + AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, + bool WantResult = true); unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); unsigned Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, @@ -737,6 +769,325 @@ void AArch64FastISel::AddLoadStoreOperands(Address &Addr, MIB.addMemOperand(MMO); } +unsigned AArch64FastISel::emitAddsSubs(bool UseAdds, MVT RetVT, + const Value *LHS, const Value *RHS, + bool IsZExt, bool WantResult) { + AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; + MVT SrcVT = RetVT; + switch (RetVT.SimpleTy) { + default: return 0; + case MVT::i1: + case MVT::i8: + ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; RetVT = MVT::i32; + break; + case MVT::i16: + ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; RetVT = MVT::i32; + break; + case MVT::i32: break; + case MVT::i64: break; + } + + // Canonicalize immediates to the RHS first. + if (UseAdds && isa(LHS) && !isa(RHS)) + std::swap(LHS, RHS); + + // Canonicalize shift immediate to the RHS. + if (UseAdds) + if (const auto *SI = dyn_cast(LHS)) + if (isa(SI->getOperand(1))) + if (SI->getOpcode() == Instruction::Shl || + SI->getOpcode() == Instruction::LShr || + SI->getOpcode() == Instruction::AShr ) + std::swap(LHS, RHS); + + unsigned LHSReg = getRegForValue(LHS); + if (!LHSReg) + return 0; + bool LHSIsKill = hasTrivialKill(LHS); + + if (ExtendType != AArch64_AM::InvalidShiftExtend) + LHSReg = EmitIntExt(SrcVT, LHSReg, RetVT, IsZExt); + + unsigned ResultReg = 0; + if (const auto *C = dyn_cast(RHS)) { + uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); + if (C->isNegative()) + ResultReg = + emitAddsSubs_ri(!UseAdds, RetVT, LHSReg, LHSIsKill, -Imm, WantResult); + else + ResultReg = + emitAddsSubs_ri(UseAdds, RetVT, LHSReg, LHSIsKill, Imm, WantResult); + } + if (ResultReg) + return ResultReg; + + if (ExtendType != AArch64_AM::InvalidShiftExtend) { + if (const auto *SI = dyn_cast(RHS)) + if (const auto *C = dyn_cast(SI->getOperand(1))) + if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { + unsigned RHSReg = getRegForValue(SI->getOperand(0)); + if (!RHSReg) + return 0; + bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); + return emitAddsSubs_rx(UseAdds, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ExtendType, C->getZExtValue(), + WantResult); + } + unsigned RHSReg = getRegForValue(RHS); + if (!RHSReg) + return 0; + bool RHSIsKill = hasTrivialKill(RHS); + return emitAddsSubs_rx(UseAdds, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, + ExtendType, 0, WantResult); + } + + // Check if the shift can be folded into the instruction. + if (const auto *SI = dyn_cast(RHS)) { + if (const auto *C = dyn_cast(SI->getOperand(1))) { + AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; + switch (SI->getOpcode()) { + default: break; + case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; + case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; + case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; + } + uint64_t ShiftVal = C->getZExtValue(); + if (ShiftType != AArch64_AM::InvalidShiftExtend) { + unsigned RHSReg = getRegForValue(SI->getOperand(0)); + if (!RHSReg) + return 0; + bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); + return emitAddsSubs_rs(UseAdds, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftType, ShiftVal, WantResult); + } + } + } + + unsigned RHSReg = getRegForValue(RHS); + if (!RHSReg) + return 0; + bool RHSIsKill = hasTrivialKill(RHS); + return emitAddsSubs_rr(UseAdds, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, + WantResult); +} + +unsigned AArch64FastISel::emitAddsSubs_rr(bool UseAdds, MVT RetVT, + unsigned LHSReg, bool LHSIsKill, + unsigned RHSReg, bool RHSIsKill, + bool WantResult) { + assert(LHSReg && RHSReg && "Invalid register number."); + + if (RetVT != MVT::i32 && RetVT != MVT::i64) + return 0; + + static const unsigned OpcTable[2][2] = { + { AArch64::ADDSWrr, AArch64::ADDSXrr }, + { AArch64::SUBSWrr, AArch64::SUBSXrr } + }; + unsigned Opc = OpcTable[!UseAdds][(RetVT == MVT::i64)]; + unsigned ResultReg; + if (WantResult) + ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + else + ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(LHSReg, getKillRegState(LHSIsKill)) + .addReg(RHSReg, getKillRegState(RHSIsKill)); + + return ResultReg; +} + +unsigned AArch64FastISel::emitAddsSubs_ri(bool UseAdds, MVT RetVT, + unsigned LHSReg, bool LHSIsKill, + uint64_t Imm, bool WantResult) { + assert(LHSReg && "Invalid register number."); + + if (RetVT != MVT::i32 && RetVT != MVT::i64) + return 0; + + unsigned ShiftImm; + if (isUInt<12>(Imm)) + ShiftImm = 0; + else if ((Imm & 0xfff000) == Imm) { + ShiftImm = 12; + Imm >>= 12; + } else + return 0; + + static const unsigned OpcTable[2][2] = { + { AArch64::ADDSWri, AArch64::ADDSXri }, + { AArch64::SUBSWri, AArch64::SUBSXri } + }; + unsigned Opc = OpcTable[!UseAdds][(RetVT == MVT::i64)]; + unsigned ResultReg; + if (WantResult) + ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + else + ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(LHSReg, getKillRegState(LHSIsKill)) + .addImm(Imm) + .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); + + return ResultReg; +} + +unsigned AArch64FastISel::emitAddsSubs_rs(bool UseAdds, MVT RetVT, + unsigned LHSReg, bool LHSIsKill, + unsigned RHSReg, bool RHSIsKill, + AArch64_AM::ShiftExtendType ShiftType, + uint64_t ShiftImm, bool WantResult) { + assert(LHSReg && RHSReg && "Invalid register number."); + + if (RetVT != MVT::i32 && RetVT != MVT::i64) + return 0; + + static const unsigned OpcTable[2][2] = { + { AArch64::ADDSWrs, AArch64::ADDSXrs }, + { AArch64::SUBSWrs, AArch64::SUBSXrs } + }; + unsigned Opc = OpcTable[!UseAdds][(RetVT == MVT::i64)]; + unsigned ResultReg; + if (WantResult) + ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + else + ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(LHSReg, getKillRegState(LHSIsKill)) + .addReg(RHSReg, getKillRegState(RHSIsKill)) + .addImm(getShifterImm(ShiftType, ShiftImm)); + + return ResultReg; +} + +unsigned AArch64FastISel::emitAddsSubs_rx(bool UseAdds, MVT RetVT, + unsigned LHSReg, bool LHSIsKill, + unsigned RHSReg, bool RHSIsKill, + AArch64_AM::ShiftExtendType ExtType, + uint64_t ShiftImm, bool WantResult) { + assert(LHSReg && RHSReg && "Invalid register number."); + + if (RetVT != MVT::i32 && RetVT != MVT::i64) + return 0; + + static const unsigned OpcTable[2][2] = { + { AArch64::ADDSWrx, AArch64::ADDSXrx }, + { AArch64::SUBSWrx, AArch64::SUBSXrx } + }; + unsigned Opc = OpcTable[!UseAdds][(RetVT == MVT::i64)]; + unsigned ResultReg; + if (WantResult) + ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + else + ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(LHSReg, getKillRegState(LHSIsKill)) + .addReg(RHSReg, getKillRegState(RHSIsKill)) + .addImm(getArithExtendImm(ExtType, ShiftImm)); + + return ResultReg; +} + +bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { + Type *Ty = LHS->getType(); + EVT EVT = TLI.getValueType(Ty, true); + if (!EVT.isSimple()) + return false; + MVT VT = EVT.getSimpleVT(); + + switch (VT.SimpleTy) { + default: + return false; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + return emitICmp(VT, LHS, RHS, IsZExt); + case MVT::f32: + case MVT::f64: + return emitFCmp(VT, LHS, RHS); + } +} + +bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, + bool IsZExt) { + return emitSubs(RetVT, LHS, RHS, IsZExt, /*WantResult=*/false) != 0; +} + +bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, + uint64_t Imm) { + return emitAddsSubs_ri(false, RetVT, LHSReg, LHSIsKill, Imm, + /*WantResult=*/false) != 0; +} + +bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { + if (RetVT != MVT::f32 && RetVT != MVT::f64) + return false; + + // Check to see if the 2nd operand is a constant that we can encode directly + // in the compare. + bool UseImm = false; + if (const auto *CFP = dyn_cast(RHS)) + if (CFP->isZero() && !CFP->isNegative()) + UseImm = true; + + unsigned LHSReg = getRegForValue(LHS); + if (!LHSReg) + return false; + bool LHSIsKill = hasTrivialKill(LHS); + + if (UseImm) { + unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addReg(LHSReg, getKillRegState(LHSIsKill)); + return true; + } + + unsigned RHSReg = getRegForValue(RHS); + if (!RHSReg) + return false; + bool RHSIsKill = hasTrivialKill(RHS); + + unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addReg(LHSReg, getKillRegState(LHSIsKill)) + .addReg(RHSReg, getKillRegState(RHSIsKill)); + return true; +} + +unsigned AArch64FastISel::emitAdds(MVT RetVT, const Value *LHS, + const Value *RHS, bool IsZExt, + bool WantResult) { + return emitAddsSubs(true, RetVT, LHS, RHS, IsZExt, WantResult); +} + +unsigned AArch64FastISel::emitSubs(MVT RetVT, const Value *LHS, + const Value *RHS, bool IsZExt, + bool WantResult) { + return emitAddsSubs(false, RetVT, LHS, RHS, IsZExt, WantResult); +} + +unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, + bool LHSIsKill, unsigned RHSReg, + bool RHSIsKill, bool WantResult) { + return emitAddsSubs_rr(false, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, + WantResult); +} + +unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, + bool LHSIsKill, unsigned RHSReg, + bool RHSIsKill, + AArch64_AM::ShiftExtendType ShiftType, + uint64_t ShiftImm, bool WantResult) { + return emitAddsSubs_rs(false, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, + ShiftType, ShiftImm, WantResult); +} + bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, MachineMemOperand *MMO) { // Simplify this down to something we can handle. @@ -998,7 +1349,7 @@ bool AArch64FastISel::SelectBranch(const Instruction *I) { return false; // Emit the cmp. - if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) + if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) return false; // Emit the branch. @@ -1035,11 +1386,7 @@ bool AArch64FastISel::SelectBranch(const Instruction *I) { TII.get(AArch64::ANDWri), ANDReg) .addReg(CondReg) .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(AArch64::SUBSWri), AArch64::WZR) - .addReg(ANDReg) - .addImm(0) - .addImm(0); + emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { std::swap(TBB, FBB); @@ -1099,6 +1446,7 @@ bool AArch64FastISel::SelectBranch(const Instruction *I) { unsigned CondReg = getRegForValue(BI->getCondition()); if (CondReg == 0) return false; + bool CondRegIsKill = hasTrivialKill(BI->getCondition()); // We've been divorced from our compare! Our block was split, and // now our compare lives in a predecessor block. We musn't @@ -1107,11 +1455,7 @@ bool AArch64FastISel::SelectBranch(const Instruction *I) { // Regardless, the compare has been done in the predecessor block, // and it left a value for us in a virtual register. Ergo, we test // the one-bit value left in the virtual register. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri), - AArch64::WZR) - .addReg(CondReg) - .addImm(0) - .addImm(0); + emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0); if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { std::swap(TBB, FBB); @@ -1150,118 +1494,6 @@ bool AArch64FastISel::SelectIndirectBr(const Instruction *I) { return true; } -bool AArch64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) { - Type *Ty = Src1Value->getType(); - EVT SrcEVT = TLI.getValueType(Ty, true); - if (!SrcEVT.isSimple()) - return false; - MVT SrcVT = SrcEVT.getSimpleVT(); - - // Check to see if the 2nd operand is a constant that we can encode directly - // in the compare. - uint64_t Imm; - bool UseImm = false; - bool isNegativeImm = false; - if (const ConstantInt *ConstInt = dyn_cast(Src2Value)) { - if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 || - SrcVT == MVT::i8 || SrcVT == MVT::i1) { - const APInt &CIVal = ConstInt->getValue(); - - Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue(); - if (CIVal.isNegative()) { - isNegativeImm = true; - Imm = -Imm; - } - // FIXME: We can handle more immediates using shifts. - UseImm = ((Imm & 0xfff) == Imm); - } - } else if (const ConstantFP *ConstFP = dyn_cast(Src2Value)) { - if (SrcVT == MVT::f32 || SrcVT == MVT::f64) - if (ConstFP->isZero() && !ConstFP->isNegative()) - UseImm = true; - } - - unsigned ZReg; - unsigned CmpOpc; - bool isICmp = true; - bool needsExt = false; - switch (SrcVT.SimpleTy) { - default: - return false; - case MVT::i1: - case MVT::i8: - case MVT::i16: - needsExt = true; - // Intentional fall-through. - case MVT::i32: - ZReg = AArch64::WZR; - if (UseImm) - CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri; - else - CmpOpc = AArch64::SUBSWrr; - break; - case MVT::i64: - ZReg = AArch64::XZR; - if (UseImm) - CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri; - else - CmpOpc = AArch64::SUBSXrr; - break; - case MVT::f32: - isICmp = false; - CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr; - break; - case MVT::f64: - isICmp = false; - CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr; - break; - } - - unsigned SrcReg1 = getRegForValue(Src1Value); - if (SrcReg1 == 0) - return false; - - unsigned SrcReg2; - if (!UseImm) { - SrcReg2 = getRegForValue(Src2Value); - if (SrcReg2 == 0) - return false; - } - - // We have i1, i8, or i16, we need to either zero extend or sign extend. - if (needsExt) { - SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt); - if (SrcReg1 == 0) - return false; - if (!UseImm) { - SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt); - if (SrcReg2 == 0) - return false; - } - } - - if (isICmp) { - if (UseImm) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), ZReg) - .addReg(SrcReg1) - .addImm(Imm) - .addImm(0); - else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), ZReg) - .addReg(SrcReg1) - .addReg(SrcReg2); - } else { - if (UseImm) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) - .addReg(SrcReg1); - else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) - .addReg(SrcReg1) - .addReg(SrcReg2); - } - return true; -} - bool AArch64FastISel::SelectCmp(const Instruction *I) { const CmpInst *CI = cast(I); @@ -1271,7 +1503,7 @@ bool AArch64FastISel::SelectCmp(const Instruction *I) { return false; // Emit the cmp. - if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) + if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) return false; // Now set a register based on the comparison. @@ -1326,12 +1558,7 @@ bool AArch64FastISel::SelectSelect(const Instruction *I) { ANDReg) .addReg(CondReg, getKillRegState(CondIsKill)) .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); - - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri), - AArch64::WZR) - .addReg(ANDReg) - .addImm(0) - .addImm(0); + emitICmp_ri(MVT::i32, ANDReg, true, 0); } unsigned TrueReg = getRegForValue(SI->getTrueValue()); @@ -2058,57 +2285,29 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) { isCommutativeIntrinsic(II)) std::swap(LHS, RHS); - unsigned LHSReg = getRegForValue(LHS); - if (!LHSReg) - return false; - bool LHSIsKill = hasTrivialKill(LHS); - - // Check if the immediate can be encoded in the instruction and if we should - // invert the instruction (adds -> subs) to handle negative immediates. - bool UseImm = false; - bool UseInverse = false; - uint64_t Imm = 0; - if (const auto *C = dyn_cast(RHS)) { - if (C->isNegative()) { - UseInverse = true; - Imm = -(C->getSExtValue()); - } else - Imm = C->getZExtValue(); - - if (isUInt<12>(Imm)) - UseImm = true; - - UseInverse = UseImm && UseInverse; - } - - static const unsigned OpcTable[2][2][2] = { - { {AArch64::ADDSWrr, AArch64::ADDSXrr}, - {AArch64::ADDSWri, AArch64::ADDSXri} }, - { {AArch64::SUBSWrr, AArch64::SUBSXrr}, - {AArch64::SUBSWri, AArch64::SUBSXri} } - }; - unsigned Opc = 0; - unsigned MulReg = 0; - unsigned RHSReg = 0; - bool RHSIsKill = false; + unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; AArch64CC::CondCode CC = AArch64CC::Invalid; - bool Is64Bit = VT == MVT::i64; switch (II->getIntrinsicID()) { default: llvm_unreachable("Unexpected intrinsic!"); case Intrinsic::sadd_with_overflow: - Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break; + ResultReg1 = emitAdds(VT, LHS, RHS); CC = AArch64CC::VS; break; case Intrinsic::uadd_with_overflow: - Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::HS; break; + ResultReg1 = emitAdds(VT, LHS, RHS); CC = AArch64CC::HS; break; case Intrinsic::ssub_with_overflow: - Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break; + ResultReg1 = emitSubs(VT, LHS, RHS); CC = AArch64CC::VS; break; case Intrinsic::usub_with_overflow: - Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::LO; break; + ResultReg1 = emitSubs(VT, LHS, RHS); CC = AArch64CC::LO; break; case Intrinsic::smul_with_overflow: { CC = AArch64CC::NE; - RHSReg = getRegForValue(RHS); + unsigned LHSReg = getRegForValue(LHS); + if (!LHSReg) + return false; + bool LHSIsKill = hasTrivialKill(LHS); + + unsigned RHSReg = getRegForValue(RHS); if (!RHSReg) return false; - RHSIsKill = hasTrivialKill(RHS); + bool RHSIsKill = hasTrivialKill(RHS); if (VT == MVT::i32) { MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); @@ -2117,41 +2316,35 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) { AArch64::sub_32); ShiftReg = FastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true, AArch64::sub_32); - unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(AArch64::SUBSWrs), CmpReg) - .addReg(ShiftReg, getKillRegState(true)) - .addReg(MulReg, getKillRegState(false)) - .addImm(159); // 159 <-> asr #31 + emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, + AArch64_AM::ASR, 31, /*WantResult=*/false); } else { assert(VT == MVT::i64 && "Unexpected value type."); MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill); unsigned SMULHReg = FastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill, RHSReg, RHSIsKill); - unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(AArch64::SUBSXrs), CmpReg) - .addReg(SMULHReg, getKillRegState(true)) - .addReg(MulReg, getKillRegState(false)) - .addImm(191); // 191 <-> asr #63 + emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, + AArch64_AM::ASR, 63, /*WantResult=*/false); } break; } case Intrinsic::umul_with_overflow: { CC = AArch64CC::NE; - RHSReg = getRegForValue(RHS); + unsigned LHSReg = getRegForValue(LHS); + if (!LHSReg) + return false; + bool LHSIsKill = hasTrivialKill(LHS); + + unsigned RHSReg = getRegForValue(RHS); if (!RHSReg) return false; - RHSIsKill = hasTrivialKill(RHS); + bool RHSIsKill = hasTrivialKill(RHS); if (VT == MVT::i32) { MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); - unsigned CmpReg = createResultReg(TLI.getRegClassFor(MVT::i64)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(AArch64::SUBSXrs), CmpReg) - .addReg(AArch64::XZR, getKillRegState(true)) - .addReg(MulReg, getKillRegState(false)) - .addImm(96); // 96 <-> lsr #32 + emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg, + /*IsKill=*/false, AArch64_AM::LSR, 32, + /*WantResult=*/false); MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, AArch64::sub_32); } else { @@ -2159,49 +2352,29 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) { MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill); unsigned UMULHReg = FastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill, RHSReg, RHSIsKill); - unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(AArch64::SUBSXrr), CmpReg) - .addReg(AArch64::XZR, getKillRegState(true)) - .addReg(UMULHReg, getKillRegState(false)); + emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg, + /*IsKill=*/false, /*WantResult=*/false); } break; } } - if (!UseImm) { - RHSReg = getRegForValue(RHS); - if (!RHSReg) - return false; - RHSIsKill = hasTrivialKill(RHS); - } - - unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); - if (Opc) { - MachineInstrBuilder MIB; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), - ResultReg) - .addReg(LHSReg, getKillRegState(LHSIsKill)); - if (UseImm) { - MIB.addImm(Imm); - MIB.addImm(0); - } else - MIB.addReg(RHSReg, getKillRegState(RHSIsKill)); - } - else + if (MulReg) { + ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg) - .addReg(MulReg); + TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); + } - unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy); - assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers."); + ResultReg2 = FuncInfo.CreateRegs(CondTy); + assert((ResultReg1 + 1) == ResultReg2 && + "Nonconsecutive result registers."); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), ResultReg2) - .addReg(AArch64::WZR, getKillRegState(true)) - .addReg(AArch64::WZR, getKillRegState(true)) - .addImm(getInvertedCondCode(CC)); + .addReg(AArch64::WZR, getKillRegState(true)) + .addReg(AArch64::WZR, getKillRegState(true)) + .addImm(getInvertedCondCode(CC)); - UpdateValueMap(II, ResultReg, 2); + UpdateValueMap(II, ResultReg1, 2); return true; } } diff --git a/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll b/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll index 971be5c4346..bd1666bfb84 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll @@ -1,10 +1,10 @@ -; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s +; RUN: llc -O0 -fast-isel-abort -mtriple=arm64-apple-darwin < %s | FileCheck %s define i32 @icmp_eq_imm(i32 %a) nounwind ssp { entry: -; CHECK: icmp_eq_imm -; CHECK: cmp w0, #31 -; CHECK: cset w0, eq +; CHECK-LABEL: icmp_eq_imm +; CHECK: cmp w0, #31 +; CHECK-NEXT: cset w0, eq %cmp = icmp eq i32 %a, 31 %conv = zext i1 %cmp to i32 ret i32 %conv @@ -12,19 +12,19 @@ entry: define i32 @icmp_eq_neg_imm(i32 %a) nounwind ssp { entry: -; CHECK: icmp_eq_neg_imm -; CHECK: cmn w0, #7 -; CHECK: cset w0, eq +; CHECK-LABEL: icmp_eq_neg_imm +; CHECK: cmn w0, #7 +; CHECK-NEXT: cset w0, eq %cmp = icmp eq i32 %a, -7 %conv = zext i1 %cmp to i32 ret i32 %conv } -define i32 @icmp_eq(i32 %a, i32 %b) nounwind ssp { +define i32 @icmp_eq_i32(i32 %a, i32 %b) nounwind ssp { entry: -; CHECK: icmp_eq -; CHECK: cmp w0, w1 -; CHECK: cset w0, eq +; CHECK-LABEL: icmp_eq_i32 +; CHECK: cmp w0, w1 +; CHECK-NEXT: cset w0, eq %cmp = icmp eq i32 %a, %b %conv = zext i1 %cmp to i32 ret i32 %conv @@ -32,9 +32,9 @@ entry: define i32 @icmp_ne(i32 %a, i32 %b) nounwind ssp { entry: -; CHECK: icmp_ne -; CHECK: cmp w0, w1 -; CHECK: cset w0, ne +; CHECK-LABEL: icmp_ne +; CHECK: cmp w0, w1 +; CHECK-NEXT: cset w0, ne %cmp = icmp ne i32 %a, %b %conv = zext i1 %cmp to i32 ret i32 %conv @@ -42,9 +42,9 @@ entry: define i32 @icmp_ugt(i32 %a, i32 %b) nounwind ssp { entry: -; CHECK: icmp_ugt -; CHECK: cmp w0, w1 -; CHECK: cset w0, hi +; CHECK-LABEL: icmp_ugt +; CHECK: cmp w0, w1 +; CHECK-NEXT: cset w0, hi %cmp = icmp ugt i32 %a, %b %conv = zext i1 %cmp to i32 ret i32 %conv @@ -52,9 +52,9 @@ entry: define i32 @icmp_uge(i32 %a, i32 %b) nounwind ssp { entry: -; CHECK: icmp_uge -; CHECK: cmp w0, w1 -; CHECK: cset w0, hs +; CHECK-LABEL: icmp_uge +; CHECK: cmp w0, w1 +; CHECK-NEXT: cset w0, hs %cmp = icmp uge i32 %a, %b %conv = zext i1 %cmp to i32 ret i32 %conv @@ -62,9 +62,9 @@ entry: define i32 @icmp_ult(i32 %a, i32 %b) nounwind ssp { entry: -; CHECK: icmp_ult -; CHECK: cmp w0, w1 -; CHECK: cset w0, lo +; CHECK-LABEL: icmp_ult +; CHECK: cmp w0, w1 +; CHECK-NEXT: cset w0, lo %cmp = icmp ult i32 %a, %b %conv = zext i1 %cmp to i32 ret i32 %conv @@ -72,9 +72,9 @@ entry: define i32 @icmp_ule(i32 %a, i32 %b) nounwind ssp { entry: -; CHECK: icmp_ule -; CHECK: cmp w0, w1 -; CHECK: cset w0, ls +; CHECK-LABEL: icmp_ule +; CHECK: cmp w0, w1 +; CHECK-NEXT: cset w0, ls %cmp = icmp ule i32 %a, %b %conv = zext i1 %cmp to i32 ret i32 %conv @@ -82,9 +82,9 @@ entry: define i32 @icmp_sgt(i32 %a, i32 %b) nounwind ssp { entry: -; CHECK: icmp_sgt -; CHECK: cmp w0, w1 -; CHECK: cset w0, gt +; CHECK-LABEL: icmp_sgt +; CHECK: cmp w0, w1 +; CHECK-NEXT: cset w0, gt %cmp = icmp sgt i32 %a, %b %conv = zext i1 %cmp to i32 ret i32 %conv @@ -92,9 +92,9 @@ entry: define i32 @icmp_sge(i32 %a, i32 %b) nounwind ssp { entry: -; CHECK: icmp_sge -; CHECK: cmp w0, w1 -; CHECK: cset w0, ge +; CHECK-LABEL: icmp_sge +; CHECK: cmp w0, w1 +; CHECK-NEXT: cset w0, ge %cmp = icmp sge i32 %a, %b %conv = zext i1 %cmp to i32 ret i32 %conv @@ -102,9 +102,9 @@ entry: define i32 @icmp_slt(i32 %a, i32 %b) nounwind ssp { entry: -; CHECK: icmp_slt -; CHECK: cmp w0, w1 -; CHECK: cset w0, lt +; CHECK-LABEL: icmp_slt +; CHECK: cmp w0, w1 +; CHECK-NEXT: cset w0, lt %cmp = icmp slt i32 %a, %b %conv = zext i1 %cmp to i32 ret i32 %conv @@ -112,9 +112,9 @@ entry: define i32 @icmp_sle(i32 %a, i32 %b) nounwind ssp { entry: -; CHECK: icmp_sle -; CHECK: cmp w0, w1 -; CHECK: cset w0, le +; CHECK-LABEL: icmp_sle +; CHECK: cmp w0, w1 +; CHECK-NEXT: cset w0, le %cmp = icmp sle i32 %a, %b %conv = zext i1 %cmp to i32 ret i32 %conv @@ -122,9 +122,9 @@ entry: define i32 @icmp_i64(i64 %a, i64 %b) nounwind ssp { entry: -; CHECK: icmp_i64 -; CHECK: cmp x0, x1 -; CHECK: cset w{{[0-9]+}}, le +; CHECK-LABEL: icmp_i64 +; CHECK: cmp x0, x1 +; CHECK-NEXT: cset w{{[0-9]+}}, le %cmp = icmp sle i64 %a, %b %conv = zext i1 %cmp to i32 ret i32 %conv @@ -132,33 +132,30 @@ entry: define zeroext i1 @icmp_eq_i16(i16 %a, i16 %b) nounwind ssp { entry: -; CHECK: icmp_eq_i16 -; CHECK: sxth w0, w0 -; CHECK: sxth w1, w1 -; CHECK: cmp w0, w1 -; CHECK: cset w0, eq +; CHECK-LABEL: icmp_eq_i16 +; CHECK: sxth w0, w0 +; CHECK: cmp w0, w1, sxth +; CHECK-NEXT: cset w0, eq %cmp = icmp eq i16 %a, %b ret i1 %cmp } define zeroext i1 @icmp_eq_i8(i8 %a, i8 %b) nounwind ssp { entry: -; CHECK: icmp_eq_i8 -; CHECK: sxtb w0, w0 -; CHECK: sxtb w1, w1 -; CHECK: cmp w0, w1 -; CHECK: cset w0, eq +; CHECK-LABEL: icmp_eq_i8 +; CHECK: sxtb w0, w0 +; CHECK-NEXT: cmp w0, w1, sxtb +; CHECK-NEXT: cset w0, eq %cmp = icmp eq i8 %a, %b ret i1 %cmp } define i32 @icmp_i16_unsigned(i16 %a, i16 %b) nounwind { entry: -; CHECK: icmp_i16_unsigned -; CHECK: uxth w0, w0 -; CHECK: uxth w1, w1 -; CHECK: cmp w0, w1 -; CHECK: cset w0, lo +; CHECK-LABEL: icmp_i16_unsigned +; CHECK: uxth w0, w0 +; CHECK-NEXT: cmp w0, w1, uxth +; CHECK-NEXT: cset w0, lo %cmp = icmp ult i16 %a, %b %conv2 = zext i1 %cmp to i32 ret i32 %conv2 @@ -166,11 +163,10 @@ entry: define i32 @icmp_i8_signed(i8 %a, i8 %b) nounwind { entry: -; CHECK: @icmp_i8_signed -; CHECK: sxtb w0, w0 -; CHECK: sxtb w1, w1 -; CHECK: cmp w0, w1 -; CHECK: cset w0, gt +; CHECK-LABEL: icmp_i8_signed +; CHECK: sxtb w0, w0 +; CHECK-NEXT: cmp w0, w1, sxtb +; CHECK-NEXT: cset w0, gt %cmp = icmp sgt i8 %a, %b %conv2 = zext i1 %cmp to i32 ret i32 %conv2 @@ -179,11 +175,11 @@ entry: define i32 @icmp_i16_signed_const(i16 %a) nounwind { entry: -; CHECK: icmp_i16_signed_const -; CHECK: sxth w0, w0 -; CHECK: cmn w0, #233 -; CHECK: cset w0, lt -; CHECK: and w0, w0, #0x1 +; CHECK-LABEL: icmp_i16_signed_const +; CHECK: sxth w0, w0 +; CHECK-NEXT: cmn w0, #233 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: and w0, w0, #0x1 %cmp = icmp slt i16 %a, -233 %conv2 = zext i1 %cmp to i32 ret i32 %conv2 @@ -191,11 +187,11 @@ entry: define i32 @icmp_i8_signed_const(i8 %a) nounwind { entry: -; CHECK: icmp_i8_signed_const -; CHECK: sxtb w0, w0 -; CHECK: cmp w0, #124 -; CHECK: cset w0, gt -; CHECK: and w0, w0, #0x1 +; CHECK-LABEL: icmp_i8_signed_const +; CHECK: sxtb w0, w0 +; CHECK-NEXT: cmp w0, #124 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: and w0, w0, #0x1 %cmp = icmp sgt i8 %a, 124 %conv2 = zext i1 %cmp to i32 ret i32 %conv2 @@ -203,11 +199,11 @@ entry: define i32 @icmp_i1_unsigned_const(i1 %a) nounwind { entry: -; CHECK: icmp_i1_unsigned_const -; CHECK: and w0, w0, #0x1 -; CHECK: cmp w0, #0 -; CHECK: cset w0, lo -; CHECK: and w0, w0, #0x1 +; CHECK-LABEL: icmp_i1_unsigned_const +; CHECK: and w0, w0, #0x1 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: and w0, w0, #0x1 %cmp = icmp ult i1 %a, 0 %conv2 = zext i1 %cmp to i32 ret i32 %conv2 diff --git a/test/CodeGen/AArch64/arm64-xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll index fe81d8d8deb..71300c4dd2c 100644 --- a/test/CodeGen/AArch64/arm64-xaluo.ll +++ b/test/CodeGen/AArch64/arm64-xaluo.ll @@ -55,6 +55,20 @@ entry: ret i1 %obit } +; Test shift folding. +define zeroext i1 @saddo5.i32(i32 %v1, i32 %v2, i32* %res) { +entry: +; CHECK-LABEL: saddo5.i32 +; CHECK: adds {{w[0-9]+}}, w0, w1 +; CHECK-NEXT: cset {{w[0-9]+}}, vs + %lsl = shl i32 %v2, 16 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %lsl) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32* %res + ret i1 %obit +} + define zeroext i1 @saddo1.i64(i64 %v1, i64 %v2, i64* %res) { entry: ; CHECK-LABEL: saddo1.i64 -- 2.34.1