X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FAArch64%2FAArch64FastISel.cpp;h=2f50480efbe23cd2fa208c5bdea63ea5b85b8e75;hb=d4f04a7e27ac8e24c47dd646b293e305881ea336;hp=e64f1bdacd8f61e99c1ad75938cf7d8ce183cff2;hpb=e7fba004ce02b6adbcc2e04d6adde18d63d0f228;p=oota-llvm.git diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index e64f1bdacd8..2f50480efbe 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "AArch64.h" +#include "AArch64CallingConvention.h" #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" @@ -35,6 +36,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -78,11 +80,9 @@ class AArch64FastISel final : public FastISel { return Base.Reg; } void setOffsetReg(unsigned Reg) { - assert(isRegBase() && "Invalid offset register access!"); OffsetReg = Reg; } unsigned getOffsetReg() const { - assert(isRegBase() && "Invalid offset register access!"); return OffsetReg; } void setFI(unsigned FI) { @@ -134,6 +134,7 @@ private: bool selectBitCast(const Instruction *I); bool selectFRem(const Instruction *I); bool selectSDiv(const Instruction *I); + bool selectGetElementPtr(const Instruction *I); // Utility helper routines. bool isTypeLegal(Type *Ty, MVT &VT); @@ -150,6 +151,9 @@ private: unsigned Alignment); bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, const Value *Cond); + bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); + bool optimizeSelect(const SelectInst *SI); + std::pair getRegForGEPIndex(const Value *Idx); // Emit helper routines. unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, @@ -173,12 +177,13 @@ private: bool WantResult = true); // Emit functions. + bool emitCompareAndBranch(const BranchInst *BI); bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); - bool emitLoad(MVT VT, unsigned &ResultReg, Address Addr, - MachineMemOperand *MMO = nullptr); + unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, + MachineMemOperand *MMO = nullptr); bool emitStore(MVT VT, unsigned SrcReg, Address Addr, MachineMemOperand *MMO = nullptr); unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); @@ -186,6 +191,7 @@ private: unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, bool SetFlags = false, bool WantResult = true, bool IsZExt = false); + unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm); unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, bool SetFlags = false, bool WantResult = true, bool IsZExt = false); @@ -240,9 +246,10 @@ public: unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, - const TargetLibraryInfo *LibInfo) + const TargetLibraryInfo *LibInfo) : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { - Subtarget = &TM.getSubtarget(); + Subtarget = + &static_cast(FuncInfo.MF->getSubtarget()); Context = &FuncInfo.Fn->getContext(); } @@ -255,14 +262,55 @@ public: #include "AArch64GenCallingConv.inc" +/// \brief Check if the sign-/zero-extend will be a noop. +static bool isIntExtFree(const Instruction *I) { + assert((isa(I) || isa(I)) && + "Unexpected integer extend instruction."); + assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && + "Unexpected value type."); + bool IsZExt = isa(I); + + if (const auto *LI = dyn_cast(I->getOperand(0))) + if (LI->hasOneUse()) + return true; + + if (const auto *Arg = dyn_cast(I->getOperand(0))) + if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) + return true; + + return false; +} + +/// \brief Determine the implicit scale factor that is applied by a memory +/// operation for a given value type. +static unsigned getImplicitScaleFactor(MVT VT) { + switch (VT.SimpleTy) { + default: + return 0; // invalid + case MVT::i1: // fall-through + case MVT::i8: + return 1; + case MVT::i16: + return 2; + case MVT::i32: // fall-through + case MVT::f32: + return 4; + case MVT::i64: // fall-through + case MVT::f64: + return 8; + } +} + CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { if (CC == CallingConv::WebKit_JS) return CC_AArch64_WebKit_JS; + if (CC == CallingConv::GHC) + return CC_AArch64_GHC; return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; } unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { - assert(TLI.getValueType(AI->getType(), true) == MVT::i64 && + assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && "Alloca should always return a pointer."); // Don't handle dynamic allocas. @@ -323,6 +371,24 @@ unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); } + // For the MachO large code model materialize the FP constant in code. + if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) { + unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; + const TargetRegisterClass *RC = Is64Bit ? + &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; + + unsigned TmpReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg) + .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); + + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(TmpReg, getKillRegState(true)); + + return ResultReg; + } + // Materialize via constant pool. MachineConstantPool wants an explicit // alignment. unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); @@ -354,7 +420,7 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); - EVT DestEVT = TLI.getValueType(GV->getType(), true); + EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); if (!DestEVT.isSimple()) return 0; @@ -393,7 +459,7 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { } unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { - EVT CEVT = TLI.getValueType(C->getType(), true); + EVT CEVT = TLI.getValueType(DL, C->getType(), true); // Only handle simple types. if (!CEVT.isSimple()) @@ -457,7 +523,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) U = C; } - if (const PointerType *Ty = dyn_cast(Obj->getType())) + if (auto *Ty = dyn_cast(Obj->getType())) if (Ty->getAddressSpace() > 255) // Fast instruction selection doesn't support the special // address spaces. @@ -472,13 +538,14 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) } case Instruction::IntToPtr: { // Look past no-op inttoptrs. - if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) return computeAddress(U->getOperand(0), Addr, Ty); break; } case Instruction::PtrToInt: { // Look past no-op ptrtoints. - if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return computeAddress(U->getOperand(0), Addr, Ty); break; } @@ -550,7 +617,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) std::swap(LHS, RHS); if (const ConstantInt *CI = dyn_cast(RHS)) { - Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue()); + Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); return computeAddress(LHS, Addr, Ty); } @@ -561,71 +628,92 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) break; } - case Instruction::Shl: + case Instruction::Sub: { + // Subs of constants are common and easy enough. + const Value *LHS = U->getOperand(0); + const Value *RHS = U->getOperand(1); + + if (const ConstantInt *CI = dyn_cast(RHS)) { + Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); + return computeAddress(LHS, Addr, Ty); + } + break; + } + case Instruction::Shl: { if (Addr.getOffsetReg()) break; - if (const auto *CI = dyn_cast(U->getOperand(1))) { - unsigned Val = CI->getZExtValue(); - if (Val < 1 || Val > 3) - break; + const auto *CI = dyn_cast(U->getOperand(1)); + if (!CI) + break; - uint64_t NumBytes = 0; - if (Ty && Ty->isSized()) { - uint64_t NumBits = DL.getTypeSizeInBits(Ty); - NumBytes = NumBits / 8; - if (!isPowerOf2_64(NumBits)) - NumBytes = 0; - } + unsigned Val = CI->getZExtValue(); + if (Val < 1 || Val > 3) + break; - if (NumBytes != (1ULL << Val)) - break; + uint64_t NumBytes = 0; + if (Ty && Ty->isSized()) { + uint64_t NumBits = DL.getTypeSizeInBits(Ty); + NumBytes = NumBits / 8; + if (!isPowerOf2_64(NumBits)) + NumBytes = 0; + } - Addr.setShift(Val); - Addr.setExtendType(AArch64_AM::LSL); - - if (const auto *I = dyn_cast(U->getOperand(0))) - if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) - U = I; - - if (const auto *ZE = dyn_cast(U)) - if (ZE->getOperand(0)->getType()->isIntegerTy(32)) - Addr.setExtendType(AArch64_AM::UXTW); - - if (const auto *SE = dyn_cast(U)) - if (SE->getOperand(0)->getType()->isIntegerTy(32)) - Addr.setExtendType(AArch64_AM::SXTW); - - if (const auto *AI = dyn_cast(U)) - if (AI->getOpcode() == Instruction::And) { - const Value *LHS = AI->getOperand(0); - const Value *RHS = AI->getOperand(1); - - if (const auto *C = dyn_cast(LHS)) - if (C->getValue() == 0xffffffff) - std::swap(LHS, RHS); - - if (const auto *C = cast(RHS)) - if (C->getValue() == 0xffffffff) { - Addr.setExtendType(AArch64_AM::UXTW); - unsigned Reg = getRegForValue(LHS); - if (!Reg) - return false; - bool RegIsKill = hasTrivialKill(LHS); - Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, - AArch64::sub_32); - Addr.setOffsetReg(Reg); - return true; - } - } + if (NumBytes != (1ULL << Val)) + break; - unsigned Reg = getRegForValue(U->getOperand(0)); - if (!Reg) - return false; - Addr.setOffsetReg(Reg); - return true; + Addr.setShift(Val); + Addr.setExtendType(AArch64_AM::LSL); + + const Value *Src = U->getOperand(0); + if (const auto *I = dyn_cast(Src)) { + if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { + // Fold the zext or sext when it won't become a noop. + if (const auto *ZE = dyn_cast(I)) { + if (!isIntExtFree(ZE) && + ZE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::UXTW); + Src = ZE->getOperand(0); + } + } else if (const auto *SE = dyn_cast(I)) { + if (!isIntExtFree(SE) && + SE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::SXTW); + Src = SE->getOperand(0); + } + } + } } - break; + + if (const auto *AI = dyn_cast(Src)) + if (AI->getOpcode() == Instruction::And) { + const Value *LHS = AI->getOperand(0); + const Value *RHS = AI->getOperand(1); + + if (const auto *C = dyn_cast(LHS)) + if (C->getValue() == 0xffffffff) + std::swap(LHS, RHS); + + if (const auto *C = dyn_cast(RHS)) + if (C->getValue() == 0xffffffff) { + Addr.setExtendType(AArch64_AM::UXTW); + unsigned Reg = getRegForValue(LHS); + if (!Reg) + return false; + bool RegIsKill = hasTrivialKill(LHS); + Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, + AArch64::sub_32); + Addr.setOffsetReg(Reg); + return true; + } + } + + unsigned Reg = getRegForValue(Src); + if (!Reg) + return false; + Addr.setOffsetReg(Reg); + return true; + } case Instruction::Mul: { if (Addr.getOffsetReg()) break; @@ -661,23 +749,27 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) Addr.setShift(Val); Addr.setExtendType(AArch64_AM::LSL); - if (const auto *I = dyn_cast(LHS)) - if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) - U = I; - - if (const auto *ZE = dyn_cast(U)) - if (ZE->getOperand(0)->getType()->isIntegerTy(32)) { - Addr.setExtendType(AArch64_AM::UXTW); - LHS = U->getOperand(0); - } - - if (const auto *SE = dyn_cast(U)) - if (SE->getOperand(0)->getType()->isIntegerTy(32)) { - Addr.setExtendType(AArch64_AM::SXTW); - LHS = U->getOperand(0); + const Value *Src = LHS; + if (const auto *I = dyn_cast(Src)) { + if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { + // Fold the zext or sext when it won't become a noop. + if (const auto *ZE = dyn_cast(I)) { + if (!isIntExtFree(ZE) && + ZE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::UXTW); + Src = ZE->getOperand(0); + } + } else if (const auto *SE = dyn_cast(I)) { + if (!isIntExtFree(SE) && + SE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::SXTW); + Src = SE->getOperand(0); + } + } } + } - unsigned Reg = getRegForValue(LHS); + unsigned Reg = getRegForValue(Src); if (!Reg) return false; Addr.setOffsetReg(Reg); @@ -687,7 +779,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) if (Addr.getOffsetReg()) break; - if (DL.getTypeSizeInBits(Ty) != 8) + if (!Ty || DL.getTypeSizeInBits(Ty) != 8) break; const Value *LHS = U->getOperand(0); @@ -697,7 +789,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) if (C->getValue() == 0xffffffff) std::swap(LHS, RHS); - if (const auto *C = cast(RHS)) + if (const auto *C = dyn_cast(RHS)) if (C->getValue() == 0xffffffff) { Addr.setShift(0); Addr.setExtendType(AArch64_AM::LSL); @@ -714,24 +806,54 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) } break; } - } // end switch + case Instruction::SExt: + case Instruction::ZExt: { + if (!Addr.getReg() || Addr.getOffsetReg()) + break; - if (Addr.getReg()) { - if (!Addr.getOffsetReg()) { - unsigned Reg = getRegForValue(Obj); - if (!Reg) - return false; - Addr.setOffsetReg(Reg); - return true; + const Value *Src = nullptr; + // Fold the zext or sext when it won't become a noop. + if (const auto *ZE = dyn_cast(U)) { + if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::UXTW); + Src = ZE->getOperand(0); + } + } else if (const auto *SE = dyn_cast(U)) { + if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::SXTW); + Src = SE->getOperand(0); + } } - return false; + + if (!Src) + break; + + Addr.setShift(0); + unsigned Reg = getRegForValue(Src); + if (!Reg) + return false; + Addr.setOffsetReg(Reg); + return true; } + } // end switch - unsigned Reg = getRegForValue(Obj); - if (!Reg) - return false; - Addr.setReg(Reg); - return true; + if (Addr.isRegBase() && !Addr.getReg()) { + unsigned Reg = getRegForValue(Obj); + if (!Reg) + return false; + Addr.setReg(Reg); + return true; + } + + if (!Addr.getOffsetReg()) { + unsigned Reg = getRegForValue(Obj); + if (!Reg) + return false; + Addr.setOffsetReg(Reg); + return true; + } + + return false; } bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { @@ -758,13 +880,13 @@ bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { case Instruction::IntToPtr: // Look past no-op inttoptrs if its operand is in the same BB. if (InMBB && - TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) return computeCallAddress(U->getOperand(0), Addr); break; case Instruction::PtrToInt: // Look past no-op ptrtoints if its operand is in the same BB. - if (InMBB && - TLI.getValueType(U->getType()) == TLI.getPointerTy()) + if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return computeCallAddress(U->getOperand(0), Addr); break; } @@ -785,7 +907,7 @@ bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { - EVT evt = TLI.getValueType(Ty, true); + EVT evt = TLI.getValueType(DL, Ty, true); // Only handle simple types. if (evt == MVT::Other || !evt.isSimple()) @@ -832,17 +954,9 @@ bool AArch64FastISel::isValueAvailable(const Value *V) const { } bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { - unsigned ScaleFactor; - switch (VT.SimpleTy) { - default: return false; - case MVT::i1: // fall-through - case MVT::i8: ScaleFactor = 1; break; - case MVT::i16: ScaleFactor = 2; break; - case MVT::i32: // fall-through - case MVT::f32: ScaleFactor = 4; break; - case MVT::i64: // fall-through - case MVT::f64: ScaleFactor = 8; break; - } + unsigned ScaleFactor = getImplicitScaleFactor(VT); + if (!ScaleFactor) + return false; bool ImmediateOffsetNeedsLowering = false; bool RegisterOffsetNeedsLowering = false; @@ -855,9 +969,8 @@ bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { // Cannot encode an offset register and an immediate offset in the same // instruction. Fold the immediate offset into the load/store instruction and - // emit an additonal add to take care of the offset register. - if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() && - Addr.getOffsetReg()) + // emit an additional add to take care of the offset register. + if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) RegisterOffsetNeedsLowering = true; // Cannot encode zero register as base. @@ -867,7 +980,8 @@ bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { // If this is a stack pointer and the offset needs to be simplified then put // the alloca address into a register, set the base type back to register and // continue. This should almost never happen. - if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) { + if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) + { unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), ResultReg) @@ -918,20 +1032,10 @@ bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { // reg+offset into a register. if (ImmediateOffsetNeedsLowering) { unsigned ResultReg; - if (Addr.getReg()) { + if (Addr.getReg()) // Try to fold the immediate into the add instruction. - if (Offset < 0) - ResultReg = emitAddSub_ri(/*UseAdd=*/false, MVT::i64, Addr.getReg(), - /*IsKill=*/false, -Offset); - else - ResultReg = emitAddSub_ri(/*UseAdd=*/true, MVT::i64, Addr.getReg(), - /*IsKill=*/false, Offset); - if (!ResultReg) { - unsigned ImmReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); - ResultReg = emitAddSub_rr(/*UseAdd=*/true, MVT::i64, Addr.getReg(), - /*IsKill=*/false, ImmReg, /*IsKill=*/true); - } - } else + ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset); + else ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); if (!ResultReg) @@ -954,8 +1058,8 @@ void AArch64FastISel::addLoadStoreOperands(Address &Addr, // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size // and alignment should be based on the VT. MMO = FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(FI, Offset), Flags, - MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); + MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); // Now add the rest of the operands. MIB.addFrameIndex(FI).addImm(Offset); } else { @@ -974,10 +1078,8 @@ void AArch64FastISel::addLoadStoreOperands(Address &Addr, MIB.addReg(Addr.getOffsetReg()); MIB.addImm(IsSigned); MIB.addImm(Addr.getShift() != 0); - } else { - MIB.addReg(Addr.getReg()); - MIB.addImm(Offset); - } + } else + MIB.addReg(Addr.getReg()).addImm(Offset); } if (MMO) @@ -1011,7 +1113,7 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); // Canonicalize immediates to the RHS first. - if (UseAdd && isa(LHS) && !isa(RHS)) + if (UseAdd && isa(LHS) && !isa(RHS)) std::swap(LHS, RHS); // Canonicalize mul by power of 2 to the RHS. @@ -1045,7 +1147,11 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, else ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags, WantResult); - } + } else if (const auto *C = dyn_cast(RHS)) + if (C->isNullValue()) + ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags, + WantResult); + if (ResultReg) return ResultReg; @@ -1072,7 +1178,7 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, } // Check if the mul can be folded into the instruction. - if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (RHS->hasOneUse() && isValueAvailable(RHS)) { if (isMulPowOf2(RHS)) { const Value *MulLHS = cast(RHS)->getOperand(0); const Value *MulRHS = cast(RHS)->getOperand(1); @@ -1087,12 +1193,16 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, if (!RHSReg) return 0; bool RHSIsKill = hasTrivialKill(MulLHS); - return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, - AArch64_AM::LSL, ShiftVal, SetFlags, WantResult); + ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags, + WantResult); + if (ResultReg) + return ResultReg; } + } // Check if the shift can be folded into the instruction. - if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (RHS->hasOneUse() && isValueAvailable(RHS)) { if (const auto *SI = dyn_cast(RHS)) { if (const auto *C = dyn_cast(SI->getOperand(1))) { AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; @@ -1108,12 +1218,15 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, if (!RHSReg) return 0; bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); - return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, - RHSIsKill, ShiftType, ShiftVal, SetFlags, - WantResult); + ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftType, ShiftVal, SetFlags, + WantResult); + if (ResultReg) + return ResultReg; } } } + } unsigned RHSReg = getRegForValue(RHS); if (!RHSReg) @@ -1217,6 +1330,10 @@ unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, if (RetVT != MVT::i32 && RetVT != MVT::i64) return 0; + // Don't deal with undefined shifts. + if (ShiftImm >= RetVT.getSizeInBits()) + return 0; + static const unsigned OpcTable[2][2][2] = { { { AArch64::SUBWrs, AArch64::SUBXrs }, { AArch64::ADDWrs, AArch64::ADDXrs } }, @@ -1254,6 +1371,9 @@ unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, if (RetVT != MVT::i32 && RetVT != MVT::i64) return 0; + if (ShiftImm >= 4) + return 0; + static const unsigned OpcTable[2][2][2] = { { { AArch64::SUBWrx, AArch64::SUBXrx }, { AArch64::ADDWrx, AArch64::ADDXrx } }, @@ -1285,7 +1405,7 @@ unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { Type *Ty = LHS->getType(); - EVT EVT = TLI.getValueType(Ty, true); + EVT EVT = TLI.getValueType(DL, Ty, true); if (!EVT.isSimple()) return false; MVT VT = EVT.getSimpleVT(); @@ -1358,6 +1478,30 @@ unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, IsZExt); } +/// \brief This method is a wrapper to simplify add emission. +/// +/// First try to emit an add with an immediate operand using emitAddSub_ri. If +/// that fails, then try to materialize the immediate into a register and use +/// emitAddSub_rr instead. +unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, + int64_t Imm) { + unsigned ResultReg; + if (Imm < 0) + ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm); + else + ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm); + + if (ResultReg) + return ResultReg; + + unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); + if (!CReg) + return 0; + + ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true); + return ResultReg; +} + unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, bool SetFlags, bool WantResult, bool IsZExt) { return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, @@ -1412,7 +1556,7 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, return ResultReg; // Check if the mul can be folded into the instruction. - if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (RHS->hasOneUse() && isValueAvailable(RHS)) { if (isMulPowOf2(RHS)) { const Value *MulLHS = cast(RHS)->getOperand(0); const Value *MulRHS = cast(RHS)->getOperand(1); @@ -1428,12 +1572,15 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, if (!RHSReg) return 0; bool RHSIsKill = hasTrivialKill(MulLHS); - return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, - RHSIsKill, ShiftVal); + ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftVal); + if (ResultReg) + return ResultReg; } + } // Check if the shift can be folded into the instruction. - if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (RHS->hasOneUse() && isValueAvailable(RHS)) { if (const auto *SI = dyn_cast(RHS)) if (const auto *C = dyn_cast(SI->getOperand(1))) { uint64_t ShiftVal = C->getZExtValue(); @@ -1441,9 +1588,12 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, if (!RHSReg) return 0; bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); - return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, - RHSIsKill, ShiftVal); + ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftVal); + if (ResultReg) + return ResultReg; } + } unsigned RHSReg = getRegForValue(RHS); if (!RHSReg) @@ -1516,6 +1666,11 @@ unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, { AArch64::ORRWrs, AArch64::ORRXrs }, { AArch64::EORWrs, AArch64::EORXrs } }; + + // Don't deal with undefined shifts. + if (ShiftImm >= RetVT.getSizeInBits()) + return 0; + const TargetRegisterClass *RC; unsigned Opc; switch (RetVT.SimpleTy) { @@ -1548,23 +1703,18 @@ unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm); } -bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr, - MachineMemOperand *MMO) { +unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, + bool WantZExt, MachineMemOperand *MMO) { + if (!TLI.allowsMisalignedMemoryAccesses(VT)) + return 0; + // Simplify this down to something we can handle. if (!simplifyAddress(Addr, VT)) - return false; + return 0; - unsigned ScaleFactor; - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type."); - case MVT::i1: // fall-through - case MVT::i8: ScaleFactor = 1; break; - case MVT::i16: ScaleFactor = 2; break; - case MVT::i32: // fall-through - case MVT::f32: ScaleFactor = 4; break; - case MVT::i64: // fall-through - case MVT::f64: ScaleFactor = 8; break; - } + unsigned ScaleFactor = getImplicitScaleFactor(VT); + if (!ScaleFactor) + llvm_unreachable("Unexpected value type."); // Negative offsets require unscaled, 9-bit, signed immediate offsets. // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. @@ -1574,20 +1724,54 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr, ScaleFactor = 1; } - static const unsigned OpcTable[4][6] = { - { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, AArch64::LDURXi, - AArch64::LDURSi, AArch64::LDURDi }, - { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, AArch64::LDRXui, - AArch64::LDRSui, AArch64::LDRDui }, - { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX, - AArch64::LDRSroX, AArch64::LDRDroX }, - { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW, - AArch64::LDRSroW, AArch64::LDRDroW } + static const unsigned GPOpcTable[2][8][4] = { + // Sign-extend. + { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, + AArch64::LDURXi }, + { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, + AArch64::LDURXi }, + { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, + AArch64::LDRXui }, + { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, + AArch64::LDRXui }, + { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, + AArch64::LDRXroX }, + { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, + AArch64::LDRXroX }, + { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, + AArch64::LDRXroW }, + { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, + AArch64::LDRXroW } + }, + // Zero-extend. + { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, + AArch64::LDURXi }, + { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, + AArch64::LDURXi }, + { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, + AArch64::LDRXui }, + { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, + AArch64::LDRXui }, + { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, + AArch64::LDRXroX }, + { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, + AArch64::LDRXroX }, + { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, + AArch64::LDRXroW }, + { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, + AArch64::LDRXroW } + } + }; + + static const unsigned FPOpcTable[4][2] = { + { AArch64::LDURSi, AArch64::LDURDi }, + { AArch64::LDRSui, AArch64::LDRDui }, + { AArch64::LDRSroX, AArch64::LDRDroX }, + { AArch64::LDRSroW, AArch64::LDRDroW } }; unsigned Opc; const TargetRegisterClass *RC; - bool VTIsi1 = false; bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && Addr.getOffsetReg(); unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; @@ -1595,30 +1779,65 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr, Addr.getExtendType() == AArch64_AM::SXTW) Idx++; + bool IsRet64Bit = RetVT == MVT::i64; switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type."); - case MVT::i1: VTIsi1 = true; // Intentional fall-through. - case MVT::i8: Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break; - case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break; - case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break; - case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break; - case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break; - case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break; + default: + llvm_unreachable("Unexpected value type."); + case MVT::i1: // Intentional fall-through. + case MVT::i8: + Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; + RC = (IsRet64Bit && !WantZExt) ? + &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; + break; + case MVT::i16: + Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; + RC = (IsRet64Bit && !WantZExt) ? + &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; + break; + case MVT::i32: + Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; + RC = (IsRet64Bit && !WantZExt) ? + &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; + break; + case MVT::i64: + Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; + RC = &AArch64::GPR64RegClass; + break; + case MVT::f32: + Opc = FPOpcTable[Idx][0]; + RC = &AArch64::FPR32RegClass; + break; + case MVT::f64: + Opc = FPOpcTable[Idx][1]; + RC = &AArch64::FPR64RegClass; + break; } // Create the base instruction, then add the operands. - ResultReg = createResultReg(RC); + unsigned ResultReg = createResultReg(RC); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); // Loading an i1 requires special handling. - if (VTIsi1) { + if (VT == MVT::i1) { unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1); assert(ANDReg && "Unexpected AND instruction emission failure."); ResultReg = ANDReg; } - return true; + + // For zero-extending loads to 64bit we emit a 32bit load and then convert + // the 32bit reg to a 64bit reg. + if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { + unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(AArch64::SUBREG_TO_REG), Reg64) + .addImm(0) + .addReg(ResultReg, getKillRegState(true)) + .addImm(AArch64::sub_32); + ResultReg = Reg64; + } + return ResultReg; } bool AArch64FastISel::selectAddSub(const Instruction *I) { @@ -1690,31 +1909,99 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { if (!computeAddress(I->getOperand(0), Addr, I->getType())) return false; - unsigned ResultReg; - if (!emitLoad(VT, ResultReg, Addr, createMachineMemOperandFor(I))) + // Fold the following sign-/zero-extend into the load instruction. + bool WantZExt = true; + MVT RetVT = VT; + const Value *IntExtVal = nullptr; + if (I->hasOneUse()) { + if (const auto *ZE = dyn_cast(I->use_begin()->getUser())) { + if (isTypeSupported(ZE->getType(), RetVT)) + IntExtVal = ZE; + else + RetVT = VT; + } else if (const auto *SE = dyn_cast(I->use_begin()->getUser())) { + if (isTypeSupported(SE->getType(), RetVT)) + IntExtVal = SE; + else + RetVT = VT; + WantZExt = false; + } + } + + unsigned ResultReg = + emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); + if (!ResultReg) return false; + // There are a few different cases we have to handle, because the load or the + // sign-/zero-extend might not be selected by FastISel if we fall-back to + // SelectionDAG. There is also an ordering issue when both instructions are in + // different basic blocks. + // 1.) The load instruction is selected by FastISel, but the integer extend + // not. This usually happens when the integer extend is in a different + // basic block and SelectionDAG took over for that basic block. + // 2.) The load instruction is selected before the integer extend. This only + // happens when the integer extend is in a different basic block. + // 3.) The load instruction is selected by SelectionDAG and the integer extend + // by FastISel. This happens if there are instructions between the load + // and the integer extend that couldn't be selected by FastISel. + if (IntExtVal) { + // The integer extend hasn't been emitted yet. FastISel or SelectionDAG + // could select it. Emit a copy to subreg if necessary. FastISel will remove + // it when it selects the integer extend. + unsigned Reg = lookUpRegForValue(IntExtVal); + auto *MI = MRI.getUniqueVRegDef(Reg); + if (!MI) { + if (RetVT == MVT::i64 && VT <= MVT::i32) { + if (WantZExt) { + // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). + std::prev(FuncInfo.InsertPt)->eraseFromParent(); + ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg(); + } else + ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, + /*IsKill=*/true, + AArch64::sub_32); + } + updateValueMap(I, ResultReg); + return true; + } + + // The integer extend has already been emitted - delete all the instructions + // that have been emitted by the integer extend lowering code and use the + // result from the load instruction directly. + while (MI) { + Reg = 0; + for (auto &Opnd : MI->uses()) { + if (Opnd.isReg()) { + Reg = Opnd.getReg(); + break; + } + } + MI->eraseFromParent(); + MI = nullptr; + if (Reg) + MI = MRI.getUniqueVRegDef(Reg); + } + updateValueMap(IntExtVal, ResultReg); + return true; + } + updateValueMap(I, ResultReg); return true; } bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, MachineMemOperand *MMO) { + if (!TLI.allowsMisalignedMemoryAccesses(VT)) + return false; + // Simplify this down to something we can handle. if (!simplifyAddress(Addr, VT)) return false; - unsigned ScaleFactor; - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type."); - case MVT::i1: // fall-through - case MVT::i8: ScaleFactor = 1; break; - case MVT::i16: ScaleFactor = 2; break; - case MVT::i32: // fall-through - case MVT::f32: ScaleFactor = 4; break; - case MVT::i64: // fall-through - case MVT::f64: ScaleFactor = 8; break; - } + unsigned ScaleFactor = getImplicitScaleFactor(VT); + if (!ScaleFactor) + llvm_unreachable("Unexpected value type."); // Negative offsets require unscaled, 9-bit, signed immediate offsets. // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. @@ -1724,7 +2011,6 @@ bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, ScaleFactor = 1; } - static const unsigned OpcTable[4][6] = { { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, AArch64::STURSi, AArch64::STURDi }, @@ -1734,7 +2020,6 @@ bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, AArch64::STRSroX, AArch64::STRDroX }, { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, AArch64::STRSroW, AArch64::STRDroW } - }; unsigned Opc; @@ -1858,30 +2143,125 @@ static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { } } -/// \brief Check if the comparison against zero and the following branch can be -/// folded into a single instruction (CBZ or CBNZ). -static bool canFoldZeroCheckIntoBranch(const CmpInst *CI) { - CmpInst::Predicate Predicate = CI->getPredicate(); - if ((Predicate != CmpInst::ICMP_EQ) && (Predicate != CmpInst::ICMP_NE)) +/// \brief Try to emit a combined compare-and-branch instruction. +bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { + assert(isa(BI->getCondition()) && "Expected cmp instruction"); + const CmpInst *CI = cast(BI->getCondition()); + CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); + + const Value *LHS = CI->getOperand(0); + const Value *RHS = CI->getOperand(1); + + MVT VT; + if (!isTypeSupported(LHS->getType(), VT)) + return false; + + unsigned BW = VT.getSizeInBits(); + if (BW > 64) + return false; + + MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; + MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; + + // Try to take advantage of fallthrough opportunities. + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + Predicate = CmpInst::getInversePredicate(Predicate); + } + + int TestBit = -1; + bool IsCmpNE; + switch (Predicate) { + default: return false; + case CmpInst::ICMP_EQ: + case CmpInst::ICMP_NE: + if (isa(LHS) && cast(LHS)->isNullValue()) + std::swap(LHS, RHS); + + if (!isa(RHS) || !cast(RHS)->isNullValue()) + return false; + + if (const auto *AI = dyn_cast(LHS)) + if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { + const Value *AndLHS = AI->getOperand(0); + const Value *AndRHS = AI->getOperand(1); + + if (const auto *C = dyn_cast(AndLHS)) + if (C->getValue().isPowerOf2()) + std::swap(AndLHS, AndRHS); + + if (const auto *C = dyn_cast(AndRHS)) + if (C->getValue().isPowerOf2()) { + TestBit = C->getValue().logBase2(); + LHS = AndLHS; + } + } + + if (VT == MVT::i1) + TestBit = 0; + + IsCmpNE = Predicate == CmpInst::ICMP_NE; + break; + case CmpInst::ICMP_SLT: + case CmpInst::ICMP_SGE: + if (!isa(RHS) || !cast(RHS)->isNullValue()) + return false; + + TestBit = BW - 1; + IsCmpNE = Predicate == CmpInst::ICMP_SLT; + break; + case CmpInst::ICMP_SGT: + case CmpInst::ICMP_SLE: + if (!isa(RHS)) + return false; + + if (cast(RHS)->getValue() != APInt(BW, -1, true)) + return false; + + TestBit = BW - 1; + IsCmpNE = Predicate == CmpInst::ICMP_SLE; + break; + } // end switch + + static const unsigned OpcTable[2][2][2] = { + { {AArch64::CBZW, AArch64::CBZX }, + {AArch64::CBNZW, AArch64::CBNZX} }, + { {AArch64::TBZW, AArch64::TBZX }, + {AArch64::TBNZW, AArch64::TBNZX} } + }; + + bool IsBitTest = TestBit != -1; + bool Is64Bit = BW == 64; + if (TestBit < 32 && TestBit >= 0) + Is64Bit = false; + + unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; + const MCInstrDesc &II = TII.get(Opc); - Type *Ty = CI->getOperand(0)->getType(); - if (!Ty->isIntegerTy()) + unsigned SrcReg = getRegForValue(LHS); + if (!SrcReg) return false; + bool SrcIsKill = hasTrivialKill(LHS); - unsigned BW = cast(Ty)->getBitWidth(); - if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64) - return false; + if (BW == 64 && !Is64Bit) + SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, + AArch64::sub_32); - if (const auto *C = dyn_cast(CI->getOperand(0))) - if (C->isNullValue()) - return true; + if ((BW < 32) && !IsBitTest) + SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true); - if (const auto *C = dyn_cast(CI->getOperand(1))) - if (C->isNullValue()) - return true; + // Emit the combined compare and branch instruction. + SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addReg(SrcReg, getKillRegState(SrcIsKill)); + if (IsBitTest) + MIB.addImm(TestBit); + MIB.addMBB(TBB); - return false; + finishCondBranch(BI->getParent(), TBB, FBB); + return true; } bool AArch64FastISel::selectBranch(const Instruction *I) { @@ -1911,74 +2291,16 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { return true; } + // Try to emit a combined compare-and-branch first. + if (emitCompareAndBranch(BI)) + return true; + // Try to take advantage of fallthrough opportunities. if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { std::swap(TBB, FBB); Predicate = CmpInst::getInversePredicate(Predicate); } - // Try to optimize comparisons against zero. - if (canFoldZeroCheckIntoBranch(CI)) { - const Value *LHS = CI->getOperand(0); - const Value *RHS = CI->getOperand(1); - - // Canonicalize zero values to the RHS. - if (const auto *C = dyn_cast(LHS)) - if (C->isNullValue()) - std::swap(LHS, RHS); - - int TestBit = -1; - if (const auto *AI = dyn_cast(LHS)) - if (AI->getOpcode() == Instruction::And) { - const Value *AndLHS = AI->getOperand(0); - const Value *AndRHS = AI->getOperand(1); - - if (const auto *C = dyn_cast(AndLHS)) - if (C->getValue().isPowerOf2()) - std::swap(AndLHS, AndRHS); - - if (const auto *C = dyn_cast(AndRHS)) - if (C->getValue().isPowerOf2()) { - TestBit = C->getValue().logBase2(); - LHS = AndLHS; - } - } - - static const unsigned OpcTable[2][2][2] = { - { {AArch64::CBZW, AArch64::CBZX }, - {AArch64::CBNZW, AArch64::CBNZX} }, - { {AArch64::TBZW, AArch64::TBZX }, - {AArch64::TBNZW, AArch64::TBNZX} } - }; - bool IsBitTest = TestBit != -1; - bool IsCmpNE = Predicate == CmpInst::ICMP_NE; - bool Is64Bit = LHS->getType()->isIntegerTy(64); - unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; - - unsigned SrcReg = getRegForValue(LHS); - if (!SrcReg) - return false; - bool SrcIsKill = hasTrivialKill(LHS); - - // Emit the combined compare and branch instruction. - MachineInstrBuilder MIB = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) - .addReg(SrcReg, getKillRegState(SrcIsKill)); - if (IsBitTest) - MIB.addImm(TestBit); - MIB.addMBB(TBB); - - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - - fastEmitBranch(FBB, DbgLoc); - return true; - } - // Emit the cmp. if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) return false; @@ -2013,14 +2335,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { .addImm(CC) .addMBB(TBB); - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - - fastEmitBranch(FBB, DbgLoc); + finishCondBranch(BI->getParent(), TBB, FBB); return true; } } else if (TruncInst *TI = dyn_cast(BI->getCondition())) { @@ -2051,14 +2366,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { .addImm(CC) .addMBB(TBB); - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - - fastEmitBranch(FBB, DbgLoc); + finishCondBranch(BI->getParent(), TBB, FBB); return true; } } else if (const auto *CI = dyn_cast(BI->getCondition())) { @@ -2068,11 +2376,12 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { .addMBB(Target); // Obtain the branch weight and add the target to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - Target->getBasicBlock()); - FuncInfo.MBB->addSuccessor(Target, BranchWeight); + if (FuncInfo.BPI) { + uint32_t BranchWeight = + FuncInfo.BPI->getEdgeWeight(BI->getParent(), Target->getBasicBlock()); + FuncInfo.MBB->addSuccessor(Target, BranchWeight); + } else + FuncInfo.MBB->addSuccessorWithoutWeight(Target); return true; } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) { // Fake request the condition, otherwise the intrinsic might be completely @@ -2086,14 +2395,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { .addImm(CC) .addMBB(TBB); - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - - fastEmitBranch(FBB, DbgLoc); + finishCondBranch(BI->getParent(), TBB, FBB); return true; } @@ -2109,7 +2411,11 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { // Regardless, the compare has been done in the predecessor block, // and it left a value for us in a virtual register. Ergo, we test // the one-bit value left in the virtual register. - emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0); + // + // FIXME: Optimize this with TBZW/TBZNW. + unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondRegIsKill, 1); + assert(ANDReg && "Unexpected AND instruction emission failure."); + emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { std::swap(TBB, FBB); @@ -2120,14 +2426,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { .addImm(CC) .addMBB(TBB); - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - - fastEmitBranch(FBB, DbgLoc); + finishCondBranch(BI->getParent(), TBB, FBB); return true; } @@ -2143,8 +2442,8 @@ bool AArch64FastISel::selectIndirectBr(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg); // Make sure the CFG is up-to-date. - for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i) - FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]); + for (auto *Succ : BI->successors()) + FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); return true; } @@ -2229,60 +2528,189 @@ bool AArch64FastISel::selectCmp(const Instruction *I) { return true; } -bool AArch64FastISel::selectSelect(const Instruction *I) { - const SelectInst *SI = cast(I); +/// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false' +/// value. +bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { + if (!SI->getType()->isIntegerTy(1)) + return false; - EVT DestEVT = TLI.getValueType(SI->getType(), true); - if (!DestEVT.isSimple()) + const Value *Src1Val, *Src2Val; + unsigned Opc = 0; + bool NeedExtraOp = false; + if (auto *CI = dyn_cast(SI->getTrueValue())) { + if (CI->isOne()) { + Src1Val = SI->getCondition(); + Src2Val = SI->getFalseValue(); + Opc = AArch64::ORRWrr; + } else { + assert(CI->isZero()); + Src1Val = SI->getFalseValue(); + Src2Val = SI->getCondition(); + Opc = AArch64::BICWrr; + } + } else if (auto *CI = dyn_cast(SI->getFalseValue())) { + if (CI->isOne()) { + Src1Val = SI->getCondition(); + Src2Val = SI->getTrueValue(); + Opc = AArch64::ORRWrr; + NeedExtraOp = true; + } else { + assert(CI->isZero()); + Src1Val = SI->getCondition(); + Src2Val = SI->getTrueValue(); + Opc = AArch64::ANDWrr; + } + } + + if (!Opc) return false; - MVT DestVT = DestEVT.getSimpleVT(); - if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 && - DestVT != MVT::f64) + unsigned Src1Reg = getRegForValue(Src1Val); + if (!Src1Reg) return false; + bool Src1IsKill = hasTrivialKill(Src1Val); - unsigned SelectOpc; - const TargetRegisterClass *RC = nullptr; - switch (DestVT.SimpleTy) { - default: return false; + unsigned Src2Reg = getRegForValue(Src2Val); + if (!Src2Reg) + return false; + bool Src2IsKill = hasTrivialKill(Src2Val); + + if (NeedExtraOp) { + Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1); + Src1IsKill = true; + } + unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, + Src1IsKill, Src2Reg, Src2IsKill); + updateValueMap(SI, ResultReg); + return true; +} + +bool AArch64FastISel::selectSelect(const Instruction *I) { + assert(isa(I) && "Expected a select instruction."); + MVT VT; + if (!isTypeSupported(I->getType(), VT)) + return false; + + unsigned Opc; + const TargetRegisterClass *RC; + switch (VT.SimpleTy) { + default: + return false; + case MVT::i1: + case MVT::i8: + case MVT::i16: case MVT::i32: - SelectOpc = AArch64::CSELWr; RC = &AArch64::GPR32RegClass; break; + Opc = AArch64::CSELWr; + RC = &AArch64::GPR32RegClass; + break; case MVT::i64: - SelectOpc = AArch64::CSELXr; RC = &AArch64::GPR64RegClass; break; + Opc = AArch64::CSELXr; + RC = &AArch64::GPR64RegClass; + break; case MVT::f32: - SelectOpc = AArch64::FCSELSrrr; RC = &AArch64::FPR32RegClass; break; + Opc = AArch64::FCSELSrrr; + RC = &AArch64::FPR32RegClass; + break; case MVT::f64: - SelectOpc = AArch64::FCSELDrrr; RC = &AArch64::FPR64RegClass; break; + Opc = AArch64::FCSELDrrr; + RC = &AArch64::FPR64RegClass; + break; } + const SelectInst *SI = cast(I); const Value *Cond = SI->getCondition(); - bool NeedTest = true; AArch64CC::CondCode CC = AArch64CC::NE; - if (foldXALUIntrinsic(CC, I, Cond)) - NeedTest = false; + AArch64CC::CondCode ExtraCC = AArch64CC::AL; - unsigned CondReg = getRegForValue(Cond); - if (!CondReg) - return false; - bool CondIsKill = hasTrivialKill(Cond); + if (optimizeSelect(SI)) + return true; - if (NeedTest) { - unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1); - assert(ANDReg && "Unexpected AND instruction emission failure."); - emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); + // Try to pickup the flags, so we don't have to emit another compare. + if (foldXALUIntrinsic(CC, I, Cond)) { + // Fake request the condition to force emission of the XALU intrinsic. + unsigned CondReg = getRegForValue(Cond); + if (!CondReg) + return false; + } else if (isa(Cond) && cast(Cond)->hasOneUse() && + isValueAvailable(Cond)) { + const auto *Cmp = cast(Cond); + // Try to optimize or fold the cmp. + CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); + const Value *FoldSelect = nullptr; + switch (Predicate) { + default: + break; + case CmpInst::FCMP_FALSE: + FoldSelect = SI->getFalseValue(); + break; + case CmpInst::FCMP_TRUE: + FoldSelect = SI->getTrueValue(); + break; + } + + if (FoldSelect) { + unsigned SrcReg = getRegForValue(FoldSelect); + if (!SrcReg) + return false; + unsigned UseReg = lookUpRegForValue(SI); + if (UseReg) + MRI.clearKillFlags(UseReg); + + updateValueMap(I, SrcReg); + return true; + } + + // Emit the cmp. + if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) + return false; + + // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. + CC = getCompareCC(Predicate); + switch (Predicate) { + default: + break; + case CmpInst::FCMP_UEQ: + ExtraCC = AArch64CC::EQ; + CC = AArch64CC::VS; + break; + case CmpInst::FCMP_ONE: + ExtraCC = AArch64CC::MI; + CC = AArch64CC::GT; + break; + } + assert((CC != AArch64CC::AL) && "Unexpected condition code."); + } else { + unsigned CondReg = getRegForValue(Cond); + if (!CondReg) + return false; + bool CondIsKill = hasTrivialKill(Cond); + + const MCInstrDesc &II = TII.get(AArch64::ANDSWri); + CondReg = constrainOperandRegClass(II, CondReg, 1); + + // Emit a TST instruction (ANDS wzr, reg, #imm). + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, + AArch64::WZR) + .addReg(CondReg, getKillRegState(CondIsKill)) + .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); } - unsigned TrueReg = getRegForValue(SI->getTrueValue()); - bool TrueIsKill = hasTrivialKill(SI->getTrueValue()); + unsigned Src1Reg = getRegForValue(SI->getTrueValue()); + bool Src1IsKill = hasTrivialKill(SI->getTrueValue()); - unsigned FalseReg = getRegForValue(SI->getFalseValue()); - bool FalseIsKill = hasTrivialKill(SI->getFalseValue()); + unsigned Src2Reg = getRegForValue(SI->getFalseValue()); + bool Src2IsKill = hasTrivialKill(SI->getFalseValue()); - if (!TrueReg || !FalseReg) + if (!Src1Reg || !Src2Reg) return false; - unsigned ResultReg = fastEmitInst_rri(SelectOpc, RC, TrueReg, TrueIsKill, - FalseReg, FalseIsKill, CC); + if (ExtraCC != AArch64CC::AL) { + Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, + Src2IsKill, ExtraCC); + Src2IsKill = true; + } + unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, + Src2IsKill, CC); updateValueMap(I, ResultReg); return true; } @@ -2329,7 +2757,7 @@ bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { if (SrcReg == 0) return false; - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true); + EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); if (SrcVT == MVT::f128) return false; @@ -2365,7 +2793,7 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { return false; bool SrcIsKill = hasTrivialKill(I->getOperand(0)); - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true); + EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); // Handle sign-extension. if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { @@ -2424,7 +2852,7 @@ bool AArch64FastISel::fastLowerArguments() { if (ArgTy->isStructTy() || ArgTy->isArrayTy()) return false; - EVT ArgVT = TLI.getValueType(ArgTy); + EVT ArgVT = TLI.getValueType(DL, ArgTy); if (!ArgVT.isSimple()) return false; @@ -2466,7 +2894,7 @@ bool AArch64FastISel::fastLowerArguments() { unsigned GPRIdx = 0; unsigned FPRIdx = 0; for (auto const &Arg : F->args()) { - MVT VT = TLI.getSimpleValueType(Arg.getType()); + MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); unsigned SrcReg; const TargetRegisterClass *RC; if (VT >= MVT::i1 && VT <= MVT::i32) { @@ -2521,8 +2949,7 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, .addImm(NumBytes); // Process the args. - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; + for (CCValAssign &VA : ArgLocs) { const Value *ArgVal = CLI.OutVals[VA.getValNo()]; MVT ArgVT = OutVTs[VA.getValNo()]; @@ -2585,8 +3012,8 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getStack(Addr.getOffset()), - MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); + MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), + MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); if (!emitStore(ArgVT, ArgReg, Addr, MMO)) return false; @@ -2616,6 +3043,11 @@ bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, // Copy all of the result registers out of their specified physreg. MVT CopyVT = RVLocs[0].getValVT(); + + // TODO: Handle big-endian results + if (CopyVT.isVector() && !Subtarget->isLittleEndian()) + return false; + unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) @@ -2634,9 +3066,9 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { bool IsTailCall = CLI.IsTailCall; bool IsVarArg = CLI.IsVarArg; const Value *Callee = CLI.Callee; - const char *SymName = CLI.SymName; + MCSymbol *Symbol = CLI.Symbol; - if (!Callee && !SymName) + if (!Callee && !Symbol) return false; // Allow SelectionDAG isel to handle tail calls. @@ -2698,8 +3130,8 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { if (CM == CodeModel::Small) { const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL); MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II); - if (SymName) - MIB.addExternalSymbol(SymName, 0); + if (Symbol) + MIB.addSym(Symbol, 0); else if (Addr.getGlobalValue()) MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); else if (Addr.getReg()) { @@ -2709,18 +3141,18 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { return false; } else { unsigned CallReg = 0; - if (SymName) { + if (Symbol) { unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), ADRPReg) - .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE); + .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); CallReg = createResultReg(&AArch64::GPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), - CallReg) - .addReg(ADRPReg) - .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | - AArch64II::MO_NC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(AArch64::LDRXui), CallReg) + .addReg(ADRPReg) + .addSym(Symbol, + AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); } else if (Addr.getGlobalValue()) CallReg = materializeGV(Addr.getGlobalValue()); else if (Addr.getReg()) @@ -2740,7 +3172,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CC)); + MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); CLI.Call = MIB; @@ -2788,14 +3220,11 @@ bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, } } - bool RV; - unsigned ResultReg; - RV = emitLoad(VT, ResultReg, Src); - if (!RV) + unsigned ResultReg = emitLoad(VT, VT, Src); + if (!ResultReg) return false; - RV = emitStore(VT, ResultReg, Dest); - if (!RV) + if (!emitStore(VT, ResultReg, Dest)) return false; int64_t Size = VT.getSizeInBits() / 8; @@ -2833,15 +3262,49 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, if (RetVT != MVT::i32 && RetVT != MVT::i64) return false; + const Value *LHS = II->getArgOperand(0); + const Value *RHS = II->getArgOperand(1); + + // Canonicalize immediate to the RHS. + if (isa(LHS) && !isa(RHS) && + isCommutativeIntrinsic(II)) + std::swap(LHS, RHS); + + // Simplify multiplies. + Intrinsic::ID IID = II->getIntrinsicID(); + switch (IID) { + default: + break; + case Intrinsic::smul_with_overflow: + if (const auto *C = dyn_cast(RHS)) + if (C->getValue() == 2) + IID = Intrinsic::sadd_with_overflow; + break; + case Intrinsic::umul_with_overflow: + if (const auto *C = dyn_cast(RHS)) + if (C->getValue() == 2) + IID = Intrinsic::uadd_with_overflow; + break; + } + AArch64CC::CondCode TmpCC; - switch (II->getIntrinsicID()) { - default: return false; - case Intrinsic::sadd_with_overflow: - case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break; - case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break; - case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break; - case Intrinsic::smul_with_overflow: - case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break; + switch (IID) { + default: + return false; + case Intrinsic::sadd_with_overflow: + case Intrinsic::ssub_with_overflow: + TmpCC = AArch64CC::VS; + break; + case Intrinsic::uadd_with_overflow: + TmpCC = AArch64CC::HS; + break; + case Intrinsic::usub_with_overflow: + TmpCC = AArch64CC::LO; + break; + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + TmpCC = AArch64CC::NE; + break; } // Check if both instructions are in the same basic block. @@ -2849,8 +3312,8 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, return false; // Make sure nothing is in the way - BasicBlock::const_iterator Start = I; - BasicBlock::const_iterator End = II; + BasicBlock::const_iterator Start(I); + BasicBlock::const_iterator End(II); for (auto Itr = std::prev(Start); Itr != End; --Itr) { // We only expect extractvalue instructions between the intrinsic and the // instruction to be selected. @@ -2876,8 +3339,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { MFI->setFrameAddressIsTaken(true); const AArch64RegisterInfo *RegInfo = - static_cast( - TM.getSubtargetImpl()->getRegisterInfo()); + static_cast(Subtarget->getRegisterInfo()); unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -2994,13 +3456,40 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { } CallLoweringInfo CLI; - CLI.setCallee(TLI.getLibcallCallingConv(LC), II->getType(), + MCContext &Ctx = MF->getContext(); + CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), TLI.getLibcallName(LC), std::move(Args)); if (!lowerCallTo(CLI)) return false; updateValueMap(II, CLI.ResultReg); return true; } + case Intrinsic::fabs: { + MVT VT; + if (!isTypeLegal(II->getType(), VT)) + return false; + + unsigned Opc; + switch (VT.SimpleTy) { + default: + return false; + case MVT::f32: + Opc = AArch64::FABSSr; + break; + case MVT::f64: + Opc = AArch64::FABSDr; + break; + } + unsigned SrcReg = getRegForValue(II->getOperand(0)); + if (!SrcReg) + return false; + bool SrcRegIsKill = hasTrivialKill(II->getOperand(0)); + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(SrcReg, getKillRegState(SrcRegIsKill)); + updateValueMap(II, ResultReg); + return true; + } case Intrinsic::trap: { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) .addImm(1); @@ -3051,7 +3540,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { std::swap(LHS, RHS); // Simplify multiplies. - unsigned IID = II->getIntrinsicID(); + Intrinsic::ID IID = II->getIntrinsicID(); switch (IID) { default: break; @@ -3115,7 +3604,10 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { AArch64_AM::ASR, 31, /*WantResult=*/false); } else { assert(VT == MVT::i64 && "Unexpected value type."); - MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill); + // LHSReg and RHSReg cannot be killed by this Mul, since they are + // reused in the next instruction. + MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, + /*IsKill=*/false); unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill, RHSReg, RHSIsKill); emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, @@ -3144,7 +3636,10 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { AArch64::sub_32); } else { assert(VT == MVT::i64 && "Unexpected value type."); - MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill); + // LHSReg and RHSReg cannot be killed by this Mul, since they are + // reused in the next instruction. + MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, + /*IsKill=*/false); unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill, RHSReg, RHSIsKill); emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg, @@ -3163,6 +3658,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, AArch64::WZR, /*IsKill=*/true, AArch64::WZR, /*IsKill=*/true, getInvertedCondCode(CC)); + (void)ResultReg2; assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); updateValueMap(II, ResultReg1, 2); @@ -3188,7 +3684,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) { if (Ret->getNumOperands() > 0) { CallingConv::ID CC = F.getCallingConv(); SmallVector Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector ValLocs; @@ -3223,7 +3719,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) { if (!MRI.getRegClass(SrcReg)->contains(DestReg)) return false; - EVT RVEVT = TLI.getValueType(RV->getType()); + EVT RVEVT = TLI.getValueType(DL, RV->getType()); if (!RVEVT.isSimple()) return false; @@ -3261,8 +3757,8 @@ bool AArch64FastISel::selectRet(const Instruction *I) { MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::RET_ReallyLR)); - for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) - MIB.addReg(RetRegs[i], RegState::Implicit); + for (unsigned RetReg : RetRegs) + MIB.addReg(RetReg, RegState::Implicit); return true; } @@ -3271,8 +3767,8 @@ bool AArch64FastISel::selectTrunc(const Instruction *I) { Value *Op = I->getOperand(0); Type *SrcTy = Op->getType(); - EVT SrcEVT = TLI.getValueType(SrcTy, true); - EVT DestEVT = TLI.getValueType(DestTy, true); + EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); + EVT DestEVT = TLI.getValueType(DL, DestTy, true); if (!SrcEVT.isSimple()) return false; if (!DestEVT.isSimple()) @@ -3433,11 +3929,12 @@ unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, bool Op0IsKill, uint64_t Shift, - bool IsZext) { + bool IsZExt) { assert(RetVT.SimpleTy >= SrcVT.SimpleTy && "Unexpected source/return type pair."); - assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 || - SrcVT == MVT::i64) && "Unexpected source value type."); + assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || + SrcVT == MVT::i32 || SrcVT == MVT::i64) && + "Unexpected source value type."); assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || RetVT == MVT::i64) && "Unexpected return value type."); @@ -3445,6 +3942,20 @@ unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, unsigned RegSize = Is64Bit ? 64 : 32; unsigned DstBits = RetVT.getSizeInBits(); unsigned SrcBits = SrcVT.getSizeInBits(); + const TargetRegisterClass *RC = + Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; + + // Just emit a copy for "zero" shifts. + if (Shift == 0) { + if (RetVT == SrcVT) { + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)); + return ResultReg; + } else + return emitIntExt(SrcVT, Op0, RetVT, IsZExt); + } // Don't deal with undefined shifts. if (Shift >= DstBits) @@ -3482,9 +3993,7 @@ unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, {AArch64::SBFMWri, AArch64::SBFMXri}, {AArch64::UBFMWri, AArch64::UBFMXri} }; - unsigned Opc = OpcTable[IsZext][Is64Bit]; - const TargetRegisterClass *RC = - Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; + unsigned Opc = OpcTable[IsZExt][Is64Bit]; if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { unsigned TmpReg = MRI.createVirtualRegister(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -3530,8 +4039,9 @@ unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, bool IsZExt) { assert(RetVT.SimpleTy >= SrcVT.SimpleTy && "Unexpected source/return type pair."); - assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 || - SrcVT == MVT::i64) && "Unexpected source value type."); + assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || + SrcVT == MVT::i32 || SrcVT == MVT::i64) && + "Unexpected source value type."); assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || RetVT == MVT::i64) && "Unexpected return value type."); @@ -3539,6 +4049,20 @@ unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, unsigned RegSize = Is64Bit ? 64 : 32; unsigned DstBits = RetVT.getSizeInBits(); unsigned SrcBits = SrcVT.getSizeInBits(); + const TargetRegisterClass *RC = + Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; + + // Just emit a copy for "zero" shifts. + if (Shift == 0) { + if (RetVT == SrcVT) { + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)); + return ResultReg; + } else + return emitIntExt(SrcVT, Op0, RetVT, IsZExt); + } // Don't deal with undefined shifts. if (Shift >= DstBits) @@ -3591,8 +4115,6 @@ unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, {AArch64::UBFMWri, AArch64::UBFMXri} }; unsigned Opc = OpcTable[IsZExt][Is64Bit]; - const TargetRegisterClass *RC = - Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { unsigned TmpReg = MRI.createVirtualRegister(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -3638,8 +4160,9 @@ unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, bool IsZExt) { assert(RetVT.SimpleTy >= SrcVT.SimpleTy && "Unexpected source/return type pair."); - assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 || - SrcVT == MVT::i64) && "Unexpected source value type."); + assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || + SrcVT == MVT::i32 || SrcVT == MVT::i64) && + "Unexpected source value type."); assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || RetVT == MVT::i64) && "Unexpected return value type."); @@ -3647,6 +4170,20 @@ unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, unsigned RegSize = Is64Bit ? 64 : 32; unsigned DstBits = RetVT.getSizeInBits(); unsigned SrcBits = SrcVT.getSizeInBits(); + const TargetRegisterClass *RC = + Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; + + // Just emit a copy for "zero" shifts. + if (Shift == 0) { + if (RetVT == SrcVT) { + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)); + return ResultReg; + } else + return emitIntExt(SrcVT, Op0, RetVT, IsZExt); + } // Don't deal with undefined shifts. if (Shift >= DstBits) @@ -3687,8 +4224,6 @@ unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, {AArch64::UBFMWri, AArch64::UBFMXri} }; unsigned Opc = OpcTable[IsZExt][Is64Bit]; - const TargetRegisterClass *RC = - Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { unsigned TmpReg = MRI.createVirtualRegister(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -3763,48 +4298,154 @@ unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm); } -bool AArch64FastISel::selectIntExt(const Instruction *I) { - // On ARM, in general, integer casts don't involve legal types; this code - // handles promotable integers. The high bits for a type smaller than - // the register size are assumed to be undefined. - Type *DestTy = I->getType(); - Value *Src = I->getOperand(0); - Type *SrcTy = Src->getType(); +static bool isZExtLoad(const MachineInstr *LI) { + switch (LI->getOpcode()) { + default: + return false; + case AArch64::LDURBBi: + case AArch64::LDURHHi: + case AArch64::LDURWi: + case AArch64::LDRBBui: + case AArch64::LDRHHui: + case AArch64::LDRWui: + case AArch64::LDRBBroX: + case AArch64::LDRHHroX: + case AArch64::LDRWroX: + case AArch64::LDRBBroW: + case AArch64::LDRHHroW: + case AArch64::LDRWroW: + return true; + } +} - unsigned SrcReg = getRegForValue(Src); - if (!SrcReg) +static bool isSExtLoad(const MachineInstr *LI) { + switch (LI->getOpcode()) { + default: return false; + case AArch64::LDURSBWi: + case AArch64::LDURSHWi: + case AArch64::LDURSBXi: + case AArch64::LDURSHXi: + case AArch64::LDURSWi: + case AArch64::LDRSBWui: + case AArch64::LDRSHWui: + case AArch64::LDRSBXui: + case AArch64::LDRSHXui: + case AArch64::LDRSWui: + case AArch64::LDRSBWroX: + case AArch64::LDRSHWroX: + case AArch64::LDRSBXroX: + case AArch64::LDRSHXroX: + case AArch64::LDRSWroX: + case AArch64::LDRSBWroW: + case AArch64::LDRSHWroW: + case AArch64::LDRSBXroW: + case AArch64::LDRSHXroW: + case AArch64::LDRSWroW: + return true; + } +} - EVT SrcEVT = TLI.getValueType(SrcTy, true); - EVT DestEVT = TLI.getValueType(DestTy, true); - if (!SrcEVT.isSimple()) +bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, + MVT SrcVT) { + const auto *LI = dyn_cast(I->getOperand(0)); + if (!LI || !LI->hasOneUse()) return false; - if (!DestEVT.isSimple()) + + // Check if the load instruction has already been selected. + unsigned Reg = lookUpRegForValue(LI); + if (!Reg) return false; - MVT SrcVT = SrcEVT.getSimpleVT(); - MVT DestVT = DestEVT.getSimpleVT(); - unsigned ResultReg = 0; + MachineInstr *MI = MRI.getUniqueVRegDef(Reg); + if (!MI) + return false; + + // Check if the correct load instruction has been emitted - SelectionDAG might + // have emitted a zero-extending load, but we need a sign-extending load. + bool IsZExt = isa(I); + const auto *LoadMI = MI; + if (LoadMI->getOpcode() == TargetOpcode::COPY && + LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { + unsigned LoadReg = MI->getOperand(1).getReg(); + LoadMI = MRI.getUniqueVRegDef(LoadReg); + assert(LoadMI && "Expected valid instruction"); + } + if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) + return false; + + // Nothing to be done. + if (RetVT != MVT::i64 || SrcVT > MVT::i32) { + updateValueMap(I, Reg); + return true; + } + + if (IsZExt) { + unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(AArch64::SUBREG_TO_REG), Reg64) + .addImm(0) + .addReg(Reg, getKillRegState(true)) + .addImm(AArch64::sub_32); + Reg = Reg64; + } else { + assert((MI->getOpcode() == TargetOpcode::COPY && + MI->getOperand(1).getSubReg() == AArch64::sub_32) && + "Expected copy instruction"); + Reg = MI->getOperand(1).getReg(); + MI->eraseFromParent(); + } + updateValueMap(I, Reg); + return true; +} + +bool AArch64FastISel::selectIntExt(const Instruction *I) { + assert((isa(I) || isa(I)) && + "Unexpected integer extend instruction."); + MVT RetVT; + MVT SrcVT; + if (!isTypeSupported(I->getType(), RetVT)) + return false; + + if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) + return false; + + // Try to optimize already sign-/zero-extended values from load instructions. + if (optimizeIntExtLoad(I, RetVT, SrcVT)) + return true; + + unsigned SrcReg = getRegForValue(I->getOperand(0)); + if (!SrcReg) + return false; + bool SrcIsKill = hasTrivialKill(I->getOperand(0)); + // Try to optimize already sign-/zero-extended values from function arguments. bool IsZExt = isa(I); - // Check if it is an argument and if it is already zero/sign-extended. - if (const auto *Arg = dyn_cast(Src)) { + if (const auto *Arg = dyn_cast(I->getOperand(0))) { if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { - if (DestVT == MVT::i64) { - ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); + if (RetVT == MVT::i64 && SrcVT != MVT::i64) { + unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBREG_TO_REG), ResultReg) - .addImm(0) - .addReg(SrcReg) - .addImm(AArch64::sub_32); - } else - ResultReg = SrcReg; + .addImm(0) + .addReg(SrcReg, getKillRegState(SrcIsKill)) + .addImm(AArch64::sub_32); + SrcReg = ResultReg; + } + // Conservatively clear all kill flags from all uses, because we are + // replacing a sign-/zero-extend instruction at IR level with a nop at MI + // level. The result of the instruction at IR level might have been + // trivially dead, which is now not longer true. + unsigned UseReg = lookUpRegForValue(I); + if (UseReg) + MRI.clearKillFlags(UseReg); + + updateValueMap(I, SrcReg); + return true; } } - if (!ResultReg) - ResultReg = emitIntExt(SrcVT, SrcReg, DestVT, IsZExt); - + unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); if (!ResultReg) return false; @@ -3813,7 +4454,7 @@ bool AArch64FastISel::selectIntExt(const Instruction *I) { } bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { - EVT DestEVT = TLI.getValueType(I->getType(), true); + EVT DestEVT = TLI.getValueType(DL, I->getType(), true); if (!DestEVT.isSimple()) return false; @@ -3879,18 +4520,22 @@ bool AArch64FastISel::selectMul(const Instruction *I) { MVT SrcVT = VT; bool IsZExt = true; if (const auto *ZExt = dyn_cast(Src0)) { - MVT VT; - if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { - SrcVT = VT; - IsZExt = true; - Src0 = ZExt->getOperand(0); + if (!isIntExtFree(ZExt)) { + MVT VT; + if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { + SrcVT = VT; + IsZExt = true; + Src0 = ZExt->getOperand(0); + } } } else if (const auto *SExt = dyn_cast(Src0)) { - MVT VT; - if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { - SrcVT = VT; - IsZExt = false; - Src0 = SExt->getOperand(0); + if (!isIntExtFree(SExt)) { + MVT VT; + if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { + SrcVT = VT; + IsZExt = false; + Src0 = SExt->getOperand(0); + } } } @@ -3939,21 +4584,25 @@ bool AArch64FastISel::selectShift(const Instruction *I) { unsigned ResultReg = 0; uint64_t ShiftVal = C->getZExtValue(); MVT SrcVT = RetVT; - bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true; + bool IsZExt = I->getOpcode() != Instruction::AShr; const Value *Op0 = I->getOperand(0); if (const auto *ZExt = dyn_cast(Op0)) { - MVT TmpVT; - if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { - SrcVT = TmpVT; - IsZExt = true; - Op0 = ZExt->getOperand(0); + if (!isIntExtFree(ZExt)) { + MVT TmpVT; + if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { + SrcVT = TmpVT; + IsZExt = true; + Op0 = ZExt->getOperand(0); + } } } else if (const auto *SExt = dyn_cast(Op0)) { - MVT TmpVT; - if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { - SrcVT = TmpVT; - IsZExt = false; - Op0 = SExt->getOperand(0); + if (!isIntExtFree(SExt)) { + MVT TmpVT; + if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { + SrcVT = TmpVT; + IsZExt = false; + Op0 = SExt->getOperand(0); + } } } @@ -4082,7 +4731,8 @@ bool AArch64FastISel::selectFRem(const Instruction *I) { } CallLoweringInfo CLI; - CLI.setCallee(TLI.getLibcallCallingConv(LC), I->getType(), + MCContext &Ctx = MF->getContext(); + CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), TLI.getLibcallName(LC), std::move(Args)); if (!lowerCallTo(CLI)) return false; @@ -4117,9 +4767,8 @@ bool AArch64FastISel::selectSDiv(const Instruction *I) { return true; } - unsigned Pow2MinusOne = (1 << Lg2) - 1; - unsigned AddReg = emitAddSub_ri(/*UseAdd=*/true, VT, Src0Reg, - /*IsKill=*/false, Pow2MinusOne); + int64_t Pow2MinusOne = (1ULL << Lg2) - 1; + unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne); if (!AddReg) return false; @@ -4159,6 +4808,101 @@ bool AArch64FastISel::selectSDiv(const Instruction *I) { return true; } +/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We +/// have to duplicate it for AArch64, because otherwise we would fail during the +/// sign-extend emission. +std::pair AArch64FastISel::getRegForGEPIndex(const Value *Idx) { + unsigned IdxN = getRegForValue(Idx); + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return std::pair(0, false); + + bool IdxNIsKill = hasTrivialKill(Idx); + + // If the index is smaller or larger than intptr_t, truncate or extend it. + MVT PtrVT = TLI.getPointerTy(DL); + EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); + if (IdxVT.bitsLT(PtrVT)) { + IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false); + IdxNIsKill = true; + } else if (IdxVT.bitsGT(PtrVT)) + llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); + return std::pair(IdxN, IdxNIsKill); +} + +/// This is mostly a copy of the existing FastISel GEP code, but we have to +/// duplicate it for AArch64, because otherwise we would bail out even for +/// simple cases. This is because the standard fastEmit functions don't cover +/// MUL at all and ADD is lowered very inefficientily. +bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { + unsigned N = getRegForValue(I->getOperand(0)); + if (!N) + return false; + bool NIsKill = hasTrivialKill(I->getOperand(0)); + + // Keep a running tab of the total offset to coalesce multiple N = N + Offset + // into a single N = N + TotalOffset. + uint64_t TotalOffs = 0; + Type *Ty = I->getOperand(0)->getType(); + MVT VT = TLI.getPointerTy(DL); + for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) { + const Value *Idx = *OI; + if (auto *StTy = dyn_cast(Ty)) { + unsigned Field = cast(Idx)->getZExtValue(); + // N = N + Offset + if (Field) + TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); + Ty = StTy->getElementType(Field); + } else { + Ty = cast(Ty)->getElementType(); + // If this is a constant subscript, handle it quickly. + if (const auto *CI = dyn_cast(Idx)) { + if (CI->isZero()) + continue; + // N = N + Offset + TotalOffs += + DL.getTypeAllocSize(Ty) * cast(CI)->getSExtValue(); + continue; + } + if (TotalOffs) { + N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); + if (!N) + return false; + NIsKill = true; + TotalOffs = 0; + } + + // N = N + Idx * ElementSize; + uint64_t ElementSize = DL.getTypeAllocSize(Ty); + std::pair Pair = getRegForGEPIndex(Idx); + unsigned IdxN = Pair.first; + bool IdxNIsKill = Pair.second; + if (!IdxN) + return false; + + if (ElementSize != 1) { + unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); + if (!C) + return false; + IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true); + if (!IdxN) + return false; + IdxNIsKill = true; + } + N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); + if (!N) + return false; + } + } + if (TotalOffs) { + N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); + if (!N) + return false; + } + updateValueMap(I, N); + return true; +} + bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { default: @@ -4201,13 +4945,8 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { case Instruction::FPToUI: return selectFPToInt(I, /*Signed=*/false); case Instruction::ZExt: - if (!selectCast(I, ISD::ZERO_EXTEND)) - return selectIntExt(I); - return true; case Instruction::SExt: - if (!selectCast(I, ISD::SIGN_EXTEND)) - return selectIntExt(I); - return true; + return selectIntExt(I); case Instruction::Trunc: if (!selectCast(I, ISD::TRUNCATE)) return selectTrunc(I); @@ -4235,6 +4974,8 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { return selectRet(I); case Instruction::FRem: return selectFRem(I); + case Instruction::GetElementPtr: + return selectGetElementPtr(I); } // fall-back to target-independent instruction selection.