X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FAArch64%2FAArch64FastISel.cpp;h=0ac4b39b0357245ace6fb3e5c7c6603d5a89f77a;hp=61017c1b110db66d0abd90a0ed10fdff11efee85;hb=bb6f14e3581c78509405a3d415e72821db8a2066;hpb=3b386bfcd2fcf97f9d56a4fed0639bf4887f7fcc diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 61017c1b110..0ac4b39b035 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -309,7 +310,7 @@ CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { } unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { - assert(TLI.getValueType(AI->getType(), true) == MVT::i64 && + assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && "Alloca should always return a pointer."); // Don't handle dynamic allocas. @@ -419,7 +420,7 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); - EVT DestEVT = TLI.getValueType(GV->getType(), true); + EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); if (!DestEVT.isSimple()) return 0; @@ -458,7 +459,7 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { } unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { - EVT CEVT = TLI.getValueType(C->getType(), true); + EVT CEVT = TLI.getValueType(DL, C->getType(), true); // Only handle simple types. if (!CEVT.isSimple()) @@ -522,7 +523,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) U = C; } - if (const PointerType *Ty = dyn_cast(Obj->getType())) + if (auto *Ty = dyn_cast(Obj->getType())) if (Ty->getAddressSpace() > 255) // Fast instruction selection doesn't support the special // address spaces. @@ -537,13 +538,14 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) } case Instruction::IntToPtr: { // Look past no-op inttoptrs. - if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) return computeAddress(U->getOperand(0), Addr, Ty); break; } case Instruction::PtrToInt: { // Look past no-op ptrtoints. - if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return computeAddress(U->getOperand(0), Addr, Ty); break; } @@ -664,20 +666,22 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) Addr.setExtendType(AArch64_AM::LSL); const Value *Src = U->getOperand(0); - if (const auto *I = dyn_cast(Src)) - if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) - Src = I; - - // Fold the zext or sext when it won't become a noop. - if (const auto *ZE = dyn_cast(Src)) { - if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { - Addr.setExtendType(AArch64_AM::UXTW); - Src = ZE->getOperand(0); - } - } else if (const auto *SE = dyn_cast(Src)) { - if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { - Addr.setExtendType(AArch64_AM::SXTW); - Src = SE->getOperand(0); + if (const auto *I = dyn_cast(Src)) { + if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { + // Fold the zext or sext when it won't become a noop. + if (const auto *ZE = dyn_cast(I)) { + if (!isIntExtFree(ZE) && + ZE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::UXTW); + Src = ZE->getOperand(0); + } + } else if (const auto *SE = dyn_cast(I)) { + if (!isIntExtFree(SE) && + SE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::SXTW); + Src = SE->getOperand(0); + } + } } } @@ -746,21 +750,22 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) Addr.setExtendType(AArch64_AM::LSL); const Value *Src = LHS; - if (const auto *I = dyn_cast(Src)) - if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) - Src = I; - - - // Fold the zext or sext when it won't become a noop. - if (const auto *ZE = dyn_cast(Src)) { - if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { - Addr.setExtendType(AArch64_AM::UXTW); - Src = ZE->getOperand(0); - } - } else if (const auto *SE = dyn_cast(Src)) { - if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { - Addr.setExtendType(AArch64_AM::SXTW); - Src = SE->getOperand(0); + if (const auto *I = dyn_cast(Src)) { + if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { + // Fold the zext or sext when it won't become a noop. + if (const auto *ZE = dyn_cast(I)) { + if (!isIntExtFree(ZE) && + ZE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::UXTW); + Src = ZE->getOperand(0); + } + } else if (const auto *SE = dyn_cast(I)) { + if (!isIntExtFree(SE) && + SE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::SXTW); + Src = SE->getOperand(0); + } + } } } @@ -875,13 +880,13 @@ bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { case Instruction::IntToPtr: // Look past no-op inttoptrs if its operand is in the same BB. if (InMBB && - TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) return computeCallAddress(U->getOperand(0), Addr); break; case Instruction::PtrToInt: // Look past no-op ptrtoints if its operand is in the same BB. - if (InMBB && - TLI.getValueType(U->getType()) == TLI.getPointerTy()) + if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return computeCallAddress(U->getOperand(0), Addr); break; } @@ -902,7 +907,7 @@ bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { - EVT evt = TLI.getValueType(Ty, true); + EVT evt = TLI.getValueType(DL, Ty, true); // Only handle simple types. if (evt == MVT::Other || !evt.isSimple()) @@ -964,7 +969,7 @@ bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { // Cannot encode an offset register and an immediate offset in the same // instruction. Fold the immediate offset into the load/store instruction and - // emit an additonal add to take care of the offset register. + // emit an additional add to take care of the offset register. if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) RegisterOffsetNeedsLowering = true; @@ -1053,8 +1058,8 @@ void AArch64FastISel::addLoadStoreOperands(Address &Addr, // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size // and alignment should be based on the VT. MMO = FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(FI, Offset), Flags, - MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); + MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); // Now add the rest of the operands. MIB.addFrameIndex(FI).addImm(Offset); } else { @@ -1173,7 +1178,7 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, } // Check if the mul can be folded into the instruction. - if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (RHS->hasOneUse() && isValueAvailable(RHS)) { if (isMulPowOf2(RHS)) { const Value *MulLHS = cast(RHS)->getOperand(0); const Value *MulRHS = cast(RHS)->getOperand(1); @@ -1188,12 +1193,16 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, if (!RHSReg) return 0; bool RHSIsKill = hasTrivialKill(MulLHS); - return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, - AArch64_AM::LSL, ShiftVal, SetFlags, WantResult); + ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags, + WantResult); + if (ResultReg) + return ResultReg; } + } // Check if the shift can be folded into the instruction. - if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (RHS->hasOneUse() && isValueAvailable(RHS)) { if (const auto *SI = dyn_cast(RHS)) { if (const auto *C = dyn_cast(SI->getOperand(1))) { AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; @@ -1209,12 +1218,15 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, if (!RHSReg) return 0; bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); - return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, - RHSIsKill, ShiftType, ShiftVal, SetFlags, - WantResult); + ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftType, ShiftVal, SetFlags, + WantResult); + if (ResultReg) + return ResultReg; } } } + } unsigned RHSReg = getRegForValue(RHS); if (!RHSReg) @@ -1318,6 +1330,10 @@ unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, if (RetVT != MVT::i32 && RetVT != MVT::i64) return 0; + // Don't deal with undefined shifts. + if (ShiftImm >= RetVT.getSizeInBits()) + return 0; + static const unsigned OpcTable[2][2][2] = { { { AArch64::SUBWrs, AArch64::SUBXrs }, { AArch64::ADDWrs, AArch64::ADDXrs } }, @@ -1355,6 +1371,9 @@ unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, if (RetVT != MVT::i32 && RetVT != MVT::i64) return 0; + if (ShiftImm >= 4) + return 0; + static const unsigned OpcTable[2][2][2] = { { { AArch64::SUBWrx, AArch64::SUBXrx }, { AArch64::ADDWrx, AArch64::ADDXrx } }, @@ -1386,7 +1405,7 @@ unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { Type *Ty = LHS->getType(); - EVT EVT = TLI.getValueType(Ty, true); + EVT EVT = TLI.getValueType(DL, Ty, true); if (!EVT.isSimple()) return false; MVT VT = EVT.getSimpleVT(); @@ -1537,7 +1556,7 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, return ResultReg; // Check if the mul can be folded into the instruction. - if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (RHS->hasOneUse() && isValueAvailable(RHS)) { if (isMulPowOf2(RHS)) { const Value *MulLHS = cast(RHS)->getOperand(0); const Value *MulRHS = cast(RHS)->getOperand(1); @@ -1553,12 +1572,15 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, if (!RHSReg) return 0; bool RHSIsKill = hasTrivialKill(MulLHS); - return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, - RHSIsKill, ShiftVal); + ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftVal); + if (ResultReg) + return ResultReg; } + } // Check if the shift can be folded into the instruction. - if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (RHS->hasOneUse() && isValueAvailable(RHS)) { if (const auto *SI = dyn_cast(RHS)) if (const auto *C = dyn_cast(SI->getOperand(1))) { uint64_t ShiftVal = C->getZExtValue(); @@ -1566,9 +1588,12 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, if (!RHSReg) return 0; bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); - return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, - RHSIsKill, ShiftVal); + ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftVal); + if (ResultReg) + return ResultReg; } + } unsigned RHSReg = getRegForValue(RHS); if (!RHSReg) @@ -1641,6 +1666,11 @@ unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, { AArch64::ORRWrs, AArch64::ORRXrs }, { AArch64::EORWrs, AArch64::EORXrs } }; + + // Don't deal with undefined shifts. + if (ShiftImm >= RetVT.getSizeInBits()) + return 0; + const TargetRegisterClass *RC; unsigned Opc; switch (RetVT.SimpleTy) { @@ -1675,6 +1705,9 @@ unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, bool WantZExt, MachineMemOperand *MMO) { + if (!TLI.allowsMisalignedMemoryAccesses(VT)) + return 0; + // Simplify this down to something we can handle. if (!simplifyAddress(Addr, VT)) return 0; @@ -1917,7 +1950,8 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { // could select it. Emit a copy to subreg if necessary. FastISel will remove // it when it selects the integer extend. unsigned Reg = lookUpRegForValue(IntExtVal); - if (!Reg) { + auto *MI = MRI.getUniqueVRegDef(Reg); + if (!MI) { if (RetVT == MVT::i64 && VT <= MVT::i32) { if (WantZExt) { // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). @@ -1935,10 +1969,7 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { // The integer extend has already been emitted - delete all the instructions // that have been emitted by the integer extend lowering code and use the // result from the load instruction directly. - while (Reg) { - auto *MI = MRI.getUniqueVRegDef(Reg); - if (!MI) - break; + while (MI) { Reg = 0; for (auto &Opnd : MI->uses()) { if (Opnd.isReg()) { @@ -1947,6 +1978,9 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { } } MI->eraseFromParent(); + MI = nullptr; + if (Reg) + MI = MRI.getUniqueVRegDef(Reg); } updateValueMap(IntExtVal, ResultReg); return true; @@ -1958,6 +1992,9 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, MachineMemOperand *MMO) { + if (!TLI.allowsMisalignedMemoryAccesses(VT)) + return false; + // Simplify this down to something we can handle. if (!simplifyAddress(Addr, VT)) return false; @@ -2223,14 +2260,7 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { MIB.addImm(TestBit); MIB.addMBB(TBB); - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - fastEmitBranch(FBB, DbgLoc); - + finishCondBranch(BI->getParent(), TBB, FBB); return true; } @@ -2245,7 +2275,6 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; - AArch64CC::CondCode CC = AArch64CC::NE; if (const CmpInst *CI = dyn_cast(BI->getCondition())) { if (CI->hasOneUse() && isValueAvailable(CI)) { // Try to optimize or fold the cmp. @@ -2277,7 +2306,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch // instruction. - CC = getCompareCC(Predicate); + AArch64CC::CondCode CC = getCompareCC(Predicate); AArch64CC::CondCode ExtraCC = AArch64CC::AL; switch (Predicate) { default: @@ -2305,52 +2334,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { .addImm(CC) .addMBB(TBB); - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - - fastEmitBranch(FBB, DbgLoc); - return true; - } - } else if (TruncInst *TI = dyn_cast(BI->getCondition())) { - MVT SrcVT; - if (TI->hasOneUse() && isValueAvailable(TI) && - isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) { - unsigned CondReg = getRegForValue(TI->getOperand(0)); - if (!CondReg) - return false; - bool CondIsKill = hasTrivialKill(TI->getOperand(0)); - - // Issue an extract_subreg to get the lower 32-bits. - if (SrcVT == MVT::i64) { - CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill, - AArch64::sub_32); - CondIsKill = true; - } - - unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1); - assert(ANDReg && "Unexpected AND instruction emission failure."); - emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); - - if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { - std::swap(TBB, FBB); - CC = AArch64CC::EQ; - } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) - .addMBB(TBB); - - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - - fastEmitBranch(FBB, DbgLoc); + finishCondBranch(BI->getParent(), TBB, FBB); return true; } } else if (const auto *CI = dyn_cast(BI->getCondition())) { @@ -2359,34 +2343,31 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) .addMBB(Target); - // Obtain the branch weight and add the target to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - Target->getBasicBlock()); - FuncInfo.MBB->addSuccessor(Target, BranchWeight); + // Obtain the branch probability and add the target to the successor list. + if (FuncInfo.BPI) { + auto BranchProbability = FuncInfo.BPI->getEdgeProbability( + BI->getParent(), Target->getBasicBlock()); + FuncInfo.MBB->addSuccessor(Target, BranchProbability); + } else + FuncInfo.MBB->addSuccessorWithoutProb(Target); return true; - } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) { - // Fake request the condition, otherwise the intrinsic might be completely - // optimized away. - unsigned CondReg = getRegForValue(BI->getCondition()); - if (!CondReg) - return false; - - // Emit the branch. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) - .addMBB(TBB); + } else { + AArch64CC::CondCode CC = AArch64CC::NE; + if (foldXALUIntrinsic(CC, I, BI->getCondition())) { + // Fake request the condition, otherwise the intrinsic might be completely + // optimized away. + unsigned CondReg = getRegForValue(BI->getCondition()); + if (!CondReg) + return false; - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); + // Emit the branch. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) + .addImm(CC) + .addMBB(TBB); - fastEmitBranch(FBB, DbgLoc); - return true; + finishCondBranch(BI->getParent(), TBB, FBB); + return true; + } } unsigned CondReg = getRegForValue(BI->getCondition()); @@ -2394,32 +2375,22 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { return false; bool CondRegIsKill = hasTrivialKill(BI->getCondition()); - // We've been divorced from our compare! Our block was split, and - // now our compare lives in a predecessor block. We musn't - // re-compare here, as the children of the compare aren't guaranteed - // live across the block boundary (we *could* check for this). - // Regardless, the compare has been done in the predecessor block, - // and it left a value for us in a virtual register. Ergo, we test - // the one-bit value left in the virtual register. - emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0); - + // i1 conditions come as i32 values, test the lowest bit with tb(n)z. + unsigned Opcode = AArch64::TBNZW; if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { std::swap(TBB, FBB); - CC = AArch64CC::EQ; + Opcode = AArch64::TBZW; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) + const MCInstrDesc &II = TII.get(Opcode); + unsigned ConstrainedCondReg + = constrainOperandRegClass(II, CondReg, II.getNumDefs()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill)) + .addImm(0) .addMBB(TBB); - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - - fastEmitBranch(FBB, DbgLoc); + finishCondBranch(BI->getParent(), TBB, FBB); return true; } @@ -2435,8 +2406,8 @@ bool AArch64FastISel::selectIndirectBr(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg); // Make sure the CFG is up-to-date. - for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i) - FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]); + for (auto *Succ : BI->successors()) + FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); return true; } @@ -2444,6 +2415,10 @@ bool AArch64FastISel::selectIndirectBr(const Instruction *I) { bool AArch64FastISel::selectCmp(const Instruction *I) { const CmpInst *CI = cast(I); + // Vectors of i1 are weird: bail out. + if (CI->getType()->isVectorTy()) + return false; + // Try to optimize or fold the cmp. CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); unsigned ResultReg = 0; @@ -2572,7 +2547,7 @@ bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1); Src1IsKill = true; } - unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32spRegClass, Src1Reg, + unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, Src1IsKill, Src2Reg, Src2IsKill); updateValueMap(SI, ResultReg); return true; @@ -2678,8 +2653,11 @@ bool AArch64FastISel::selectSelect(const Instruction *I) { return false; bool CondIsKill = hasTrivialKill(Cond); + const MCInstrDesc &II = TII.get(AArch64::ANDSWri); + CondReg = constrainOperandRegClass(II, CondReg, 1); + // Emit a TST instruction (ANDS wzr, reg, #imm). - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDSWri), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, AArch64::WZR) .addReg(CondReg, getKillRegState(CondIsKill)) .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); @@ -2747,7 +2725,7 @@ bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { if (SrcReg == 0) return false; - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true); + EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); if (SrcVT == MVT::f128) return false; @@ -2783,7 +2761,7 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { return false; bool SrcIsKill = hasTrivialKill(I->getOperand(0)); - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true); + EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); // Handle sign-extension. if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { @@ -2842,7 +2820,7 @@ bool AArch64FastISel::fastLowerArguments() { if (ArgTy->isStructTy() || ArgTy->isArrayTy()) return false; - EVT ArgVT = TLI.getValueType(ArgTy); + EVT ArgVT = TLI.getValueType(DL, ArgTy); if (!ArgVT.isSimple()) return false; @@ -2884,7 +2862,7 @@ bool AArch64FastISel::fastLowerArguments() { unsigned GPRIdx = 0; unsigned FPRIdx = 0; for (auto const &Arg : F->args()) { - MVT VT = TLI.getSimpleValueType(Arg.getType()); + MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); unsigned SrcReg; const TargetRegisterClass *RC; if (VT >= MVT::i1 && VT <= MVT::i32) { @@ -2939,8 +2917,7 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, .addImm(NumBytes); // Process the args. - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; + for (CCValAssign &VA : ArgLocs) { const Value *ArgVal = CLI.OutVals[VA.getValNo()]; MVT ArgVT = OutVTs[VA.getValNo()]; @@ -3003,8 +2980,8 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getStack(Addr.getOffset()), - MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); + MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), + MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); if (!emitStore(ArgVT, ArgReg, Addr, MMO)) return false; @@ -3034,6 +3011,11 @@ bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, // Copy all of the result registers out of their specified physreg. MVT CopyVT = RVLocs[0].getValVT(); + + // TODO: Handle big-endian results + if (CopyVT.isVector() && !Subtarget->isLittleEndian()) + return false; + unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) @@ -3052,9 +3034,9 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { bool IsTailCall = CLI.IsTailCall; bool IsVarArg = CLI.IsVarArg; const Value *Callee = CLI.Callee; - const char *SymName = CLI.SymName; + MCSymbol *Symbol = CLI.Symbol; - if (!Callee && !SymName) + if (!Callee && !Symbol) return false; // Allow SelectionDAG isel to handle tail calls. @@ -3116,8 +3098,8 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { if (CM == CodeModel::Small) { const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL); MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II); - if (SymName) - MIB.addExternalSymbol(SymName, 0); + if (Symbol) + MIB.addSym(Symbol, 0); else if (Addr.getGlobalValue()) MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); else if (Addr.getReg()) { @@ -3127,18 +3109,18 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { return false; } else { unsigned CallReg = 0; - if (SymName) { + if (Symbol) { unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), ADRPReg) - .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE); + .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); CallReg = createResultReg(&AArch64::GPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), - CallReg) - .addReg(ADRPReg) - .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | - AArch64II::MO_NC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(AArch64::LDRXui), CallReg) + .addReg(ADRPReg) + .addSym(Symbol, + AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); } else if (Addr.getGlobalValue()) CallReg = materializeGV(Addr.getGlobalValue()); else if (Addr.getReg()) @@ -3158,7 +3140,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CC)); + MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); CLI.Call = MIB; @@ -3257,7 +3239,7 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, std::swap(LHS, RHS); // Simplify multiplies. - unsigned IID = II->getIntrinsicID(); + Intrinsic::ID IID = II->getIntrinsicID(); switch (IID) { default: break; @@ -3298,8 +3280,8 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, return false; // Make sure nothing is in the way - BasicBlock::const_iterator Start = I; - BasicBlock::const_iterator End = II; + BasicBlock::const_iterator Start(I); + BasicBlock::const_iterator End(II); for (auto Itr = std::prev(Start); Itr != End; --Itr) { // We only expect extractvalue instructions between the intrinsic and the // instruction to be selected. @@ -3442,7 +3424,8 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { } CallLoweringInfo CLI; - CLI.setCallee(TLI.getLibcallCallingConv(LC), II->getType(), + MCContext &Ctx = MF->getContext(); + CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), TLI.getLibcallName(LC), std::move(Args)); if (!lowerCallTo(CLI)) return false; @@ -3525,7 +3508,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { std::swap(LHS, RHS); // Simplify multiplies. - unsigned IID = II->getIntrinsicID(); + Intrinsic::ID IID = II->getIntrinsicID(); switch (IID) { default: break; @@ -3589,7 +3572,10 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { AArch64_AM::ASR, 31, /*WantResult=*/false); } else { assert(VT == MVT::i64 && "Unexpected value type."); - MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill); + // LHSReg and RHSReg cannot be killed by this Mul, since they are + // reused in the next instruction. + MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, + /*IsKill=*/false); unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill, RHSReg, RHSIsKill); emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, @@ -3618,7 +3604,10 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { AArch64::sub_32); } else { assert(VT == MVT::i64 && "Unexpected value type."); - MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill); + // LHSReg and RHSReg cannot be killed by this Mul, since they are + // reused in the next instruction. + MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, + /*IsKill=*/false); unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill, RHSReg, RHSIsKill); emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg, @@ -3657,13 +3646,16 @@ bool AArch64FastISel::selectRet(const Instruction *I) { if (F.isVarArg()) return false; + if (TLI.supportSplitCSR(FuncInfo.MF)) + return false; + // Build a list of return value registers. SmallVector RetRegs; if (Ret->getNumOperands() > 0) { CallingConv::ID CC = F.getCallingConv(); SmallVector Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector ValLocs; @@ -3698,7 +3690,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) { if (!MRI.getRegClass(SrcReg)->contains(DestReg)) return false; - EVT RVEVT = TLI.getValueType(RV->getType()); + EVT RVEVT = TLI.getValueType(DL, RV->getType()); if (!RVEVT.isSimple()) return false; @@ -3736,8 +3728,8 @@ bool AArch64FastISel::selectRet(const Instruction *I) { MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::RET_ReallyLR)); - for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) - MIB.addReg(RetRegs[i], RegState::Implicit); + for (unsigned RetReg : RetRegs) + MIB.addReg(RetReg, RegState::Implicit); return true; } @@ -3746,8 +3738,8 @@ bool AArch64FastISel::selectTrunc(const Instruction *I) { Value *Op = I->getOperand(0); Type *SrcTy = Op->getType(); - EVT SrcEVT = TLI.getValueType(SrcTy, true); - EVT DestEVT = TLI.getValueType(DestTy, true); + EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); + EVT DestEVT = TLI.getValueType(DL, DestTy, true); if (!SrcEVT.isSimple()) return false; if (!DestEVT.isSimple()) @@ -4433,7 +4425,7 @@ bool AArch64FastISel::selectIntExt(const Instruction *I) { } bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { - EVT DestEVT = TLI.getValueType(I->getType(), true); + EVT DestEVT = TLI.getValueType(DL, I->getType(), true); if (!DestEVT.isSimple()) return false; @@ -4563,7 +4555,7 @@ bool AArch64FastISel::selectShift(const Instruction *I) { unsigned ResultReg = 0; uint64_t ShiftVal = C->getZExtValue(); MVT SrcVT = RetVT; - bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true; + bool IsZExt = I->getOpcode() != Instruction::AShr; const Value *Op0 = I->getOperand(0); if (const auto *ZExt = dyn_cast(Op0)) { if (!isIntExtFree(ZExt)) { @@ -4710,7 +4702,8 @@ bool AArch64FastISel::selectFRem(const Instruction *I) { } CallLoweringInfo CLI; - CLI.setCallee(TLI.getLibcallCallingConv(LC), I->getType(), + MCContext &Ctx = MF->getContext(); + CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), TLI.getLibcallName(LC), std::move(Args)); if (!lowerCallTo(CLI)) return false; @@ -4798,7 +4791,7 @@ std::pair AArch64FastISel::getRegForGEPIndex(const Value *Idx) { bool IdxNIsKill = hasTrivialKill(Idx); // If the index is smaller or larger than intptr_t, truncate or extend it. - MVT PtrVT = TLI.getPointerTy(); + MVT PtrVT = TLI.getPointerTy(DL); EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); if (IdxVT.bitsLT(PtrVT)) { IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false); @@ -4822,7 +4815,7 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { // into a single N = N + TotalOffset. uint64_t TotalOffs = 0; Type *Ty = I->getOperand(0)->getType(); - MVT VT = TLI.getPointerTy(); + MVT VT = TLI.getPointerTy(DL); for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) { const Value *Idx = *OI; if (auto *StTy = dyn_cast(Ty)) {