X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FPowerPC%2FPPCFastISel.cpp;h=188c8e84c64d6a159bc7a55a875f1c2c12352222;hp=07e3b64f7a837997226eeafe5dd7e07b6dea69c4;hb=18a86c95fc36b5f622e8dc87f71252de37a1ed44;hpb=1836fe5651722c638e32ac94c5b4fe885afa9f15 diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 07e3b64f7a8..188c8e84c64 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -13,10 +13,11 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ppcfastisel" #include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPCCallingConv.h" #include "PPCISelLowering.h" +#include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/ADT/Optional.h" @@ -28,19 +29,19 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" //===----------------------------------------------------------------------===// // // TBD: -// FastLowerArguments: Handle simple cases. +// fastLowerArguments: Handle simple cases. // PPCMaterializeGV: Handle TLS. // SelectCall: Handle function pointers. // SelectCall: Handle multi-register return values. @@ -58,6 +59,8 @@ //===----------------------------------------------------------------------===// using namespace llvm; +#define DEBUG_TYPE "ppcfastisel" + namespace { typedef struct Address { @@ -80,46 +83,47 @@ typedef struct Address { } } Address; -class PPCFastISel : public FastISel { +class PPCFastISel final : public FastISel { const TargetMachine &TM; + const PPCSubtarget *PPCSubTarget; + PPCFunctionInfo *PPCFuncInfo; const TargetInstrInfo &TII; const TargetLowering &TLI; - const PPCSubtarget &PPCSubTarget; LLVMContext *Context; public: explicit PPCFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) - : FastISel(FuncInfo, LibInfo), - TM(FuncInfo.MF->getTarget()), - TII(*TM.getInstrInfo()), - TLI(*TM.getTargetLowering()), - PPCSubTarget( - *((static_cast(&TM))->getSubtargetImpl()) - ), - Context(&FuncInfo.Fn->getContext()) { } + : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()), + PPCSubTarget(&FuncInfo.MF->getSubtarget()), + PPCFuncInfo(FuncInfo.MF->getInfo()), + TII(*PPCSubTarget->getInstrInfo()), + TLI(*PPCSubTarget->getTargetLowering()), + Context(&FuncInfo.Fn->getContext()) {} // Backend specific FastISel code. private: - virtual bool TargetSelectInstruction(const Instruction *I); - virtual unsigned TargetMaterializeConstant(const Constant *C); - virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); - virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, - const LoadInst *LI); - virtual bool FastLowerArguments(); - virtual unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm); - virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - uint64_t Imm); - virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill); - virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill); + bool fastSelectInstruction(const Instruction *I) override; + unsigned fastMaterializeConstant(const Constant *C) override; + unsigned fastMaterializeAlloca(const AllocaInst *AI) override; + bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI) override; + bool fastLowerArguments() override; + unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override; + unsigned fastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm); + unsigned fastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill); + unsigned fastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill); + + bool fastLowerCall(CallLoweringInfo &CLI) override; // Instruction selection routines. private: @@ -127,13 +131,11 @@ class PPCFastISel : public FastISel { bool SelectStore(const Instruction *I); bool SelectBranch(const Instruction *I); bool SelectIndirectBr(const Instruction *I); - bool SelectCmp(const Instruction *I); bool SelectFPExt(const Instruction *I); bool SelectFPTrunc(const Instruction *I); bool SelectIToFP(const Instruction *I, bool IsSigned); bool SelectFPToI(const Instruction *I, bool IsSigned); bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); - bool SelectCall(const Instruction *I); bool SelectRet(const Instruction *I); bool SelectTrunc(const Instruction *I); bool SelectIntExt(const Instruction *I); @@ -142,6 +144,13 @@ class PPCFastISel : public FastISel { private: bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); + bool isValueAvailable(const Value *V) const; + bool isVSFRCRegister(unsigned Register) const { + return MRI.getRegClass(Register)->getID() == PPC::VSFRCRegClassID; + } + bool isVSSRCRegister(unsigned Register) const { + return MRI.getRegClass(Register)->getID() == PPC::VSSRCRegClassID; + } bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt, unsigned DestReg); bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, @@ -155,7 +164,8 @@ class PPCFastISel : public FastISel { unsigned DestReg, bool IsZExt); unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT); unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT); - unsigned PPCMaterializeInt(const Constant *C, MVT VT); + unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT, + bool UseSExt = true); unsigned PPCMaterialize32BitInt(int64_t Imm, const TargetRegisterClass *RC); unsigned PPCMaterialize64BitInt(int64_t Imm, @@ -174,9 +184,7 @@ class PPCFastISel : public FastISel { CallingConv::ID CC, unsigned &NumBytes, bool IsVarArg); - void finishCall(MVT RetVT, SmallVectorImpl &UsedRegs, - const Instruction *I, CallingConv::ID CC, - unsigned &NumBytes, bool IsVarArg); + bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes); CCAssignFn *usePPC32CCs(unsigned Flag); private: @@ -255,7 +263,7 @@ static Optional getComparePred(CmpInst::Predicate Pred) { // fast-isel, and return its equivalent machine type in VT. // FIXME: Copied directly from ARM -- factor into base class? bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) { - EVT Evt = TLI.getValueType(Ty, true); + EVT Evt = TLI.getValueType(DL, Ty, true); // Only handle simple types. if (Evt == MVT::Other || !Evt.isSimple()) return false; @@ -280,10 +288,18 @@ bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { return false; } +bool PPCFastISel::isValueAvailable(const Value *V) const { + if (!isa(V)) + return true; + + const auto *I = cast(V); + return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; +} + // Given a value Obj, create an Address object Addr that represents its // address. Return false if we can't handle it. bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) { - const User *U = NULL; + const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast(Obj)) { // Don't walk into other basic blocks unless the object is an alloca from @@ -306,12 +322,13 @@ bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) { return PPCComputeAddress(U->getOperand(0), Addr); case Instruction::IntToPtr: // Look past no-op inttoptrs. - if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) return PPCComputeAddress(U->getOperand(0), Addr); break; case Instruction::PtrToInt: // Look past no-op ptrtoints. - if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return PPCComputeAddress(U->getOperand(0), Addr); break; case Instruction::GetElementPtr: { @@ -485,6 +502,19 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, // the indexed form. Also handle stack pointers with special needs. unsigned IndexReg = 0; PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg); + + // If this is a potential VSX load with an offset of 0, a VSX indexed load can + // be used. + bool IsVSSRC = (ResultReg != 0) && isVSSRCRegister(ResultReg); + bool IsVSFRC = (ResultReg != 0) && isVSFRCRegister(ResultReg); + bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS; + bool Is64VSXLoad = IsVSSRC && Opc == PPC::LFD; + if ((Is32VSXLoad || Is64VSXLoad) && + (Addr.BaseType != Address::FrameIndexBase) && UseOffset && + (Addr.Offset == 0)) { + UseOffset = false; + } + if (ResultReg == 0) ResultReg = createResultReg(UseRC); @@ -492,10 +522,12 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, // in range, as otherwise PPCSimplifyAddress would have converted it // into a RegBase. if (Addr.BaseType == Address::FrameIndexBase) { + // VSX only provides an indexed load. + if (Is32VSXLoad || Is64VSXLoad) return false; - MachineMemOperand *MMO = - FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset), + MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI, + Addr.Offset), MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI), MFI.getObjectAlignment(Addr.Base.FI)); @@ -504,6 +536,8 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, // Base reg with offset in range. } else if (UseOffset) { + // VSX only provides an indexed load. + if (Is32VSXLoad || Is64VSXLoad) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addImm(Addr.Offset).addReg(Addr.Base.Reg); @@ -526,8 +560,8 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, case PPC::LWA: Opc = PPC::LWAX; break; case PPC::LWA_32: Opc = PPC::LWAX_32; break; case PPC::LD: Opc = PPC::LDX; break; - case PPC::LFS: Opc = PPC::LFSX; break; - case PPC::LFD: Opc = PPC::LFDX; break; + case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break; + case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break; } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(Addr.Base.Reg).addReg(IndexReg); @@ -557,12 +591,12 @@ bool PPCFastISel::SelectLoad(const Instruction *I) { // to constrain RA from using R0/X0 when this is not legal. unsigned AssignedReg = FuncInfo.ValueMap[I]; const TargetRegisterClass *RC = - AssignedReg ? MRI.getRegClass(AssignedReg) : 0; + AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; unsigned ResultReg = 0; if (!PPCEmitLoad(VT, ResultReg, Addr, RC)) return false; - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } @@ -605,13 +639,28 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { unsigned IndexReg = 0; PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg); + // If this is a potential VSX store with an offset of 0, a VSX indexed store + // can be used. + bool IsVSSRC = isVSSRCRegister(SrcReg); + bool IsVSFRC = isVSFRCRegister(SrcReg); + bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS; + bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD; + if ((Is32VSXStore || Is64VSXStore) && + (Addr.BaseType != Address::FrameIndexBase) && UseOffset && + (Addr.Offset == 0)) { + UseOffset = false; + } + // Note: If we still have a frame index here, we know the offset is // in range, as otherwise PPCSimplifyAddress would have converted it // into a RegBase. if (Addr.BaseType == Address::FrameIndexBase) { - MachineMemOperand *MMO = - FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset), + // VSX only provides an indexed store. + if (Is32VSXStore || Is64VSXStore) return false; + + MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI, + Addr.Offset), MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI), MFI.getObjectAlignment(Addr.Base.FI)); @@ -622,12 +671,15 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { .addMemOperand(MMO); // Base reg with offset in range. - } else if (UseOffset) + } else if (UseOffset) { + // VSX only provides an indexed store. + if (Is32VSXStore || Is64VSXStore) return false; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg); // Indexed form. - else { + } else { // Get the RR opcode corresponding to the RI one. FIXME: It would be // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it // is hard to get at. @@ -640,11 +692,21 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { case PPC::STH8: Opc = PPC::STHX8; break; case PPC::STW8: Opc = PPC::STWX8; break; case PPC::STD: Opc = PPC::STDX; break; - case PPC::STFS: Opc = PPC::STFSX; break; - case PPC::STFD: Opc = PPC::STFDX; break; + case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break; + case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) - .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg); + + auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addReg(SrcReg); + + // If we have an index register defined we use it in the store inst, + // otherwise we use X0 as base as it makes the vector instructions to + // use zero in the computation of the effective address regardless the + // content of the register. + if (IndexReg) + MIB.addReg(Addr.Base.Reg).addReg(IndexReg); + else + MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg); } return true; @@ -689,35 +751,35 @@ bool PPCFastISel::SelectBranch(const Instruction *I) { // For now, just try the simplest case where it's fed by a compare. if (const CmpInst *CI = dyn_cast(BI->getCondition())) { - Optional OptPPCPred = getComparePred(CI->getPredicate()); - if (!OptPPCPred) - return false; + if (isValueAvailable(CI)) { + Optional OptPPCPred = getComparePred(CI->getPredicate()); + if (!OptPPCPred) + return false; - PPC::Predicate PPCPred = OptPPCPred.getValue(); + PPC::Predicate PPCPred = OptPPCPred.getValue(); - // Take advantage of fall-through opportunities. - if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { - std::swap(TBB, FBB); - PPCPred = PPC::InvertPredicate(PPCPred); - } - - unsigned CondReg = createResultReg(&PPC::CRRCRegClass); + // Take advantage of fall-through opportunities. + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + PPCPred = PPC::InvertPredicate(PPCPred); + } - if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), - CondReg)) - return false; + unsigned CondReg = createResultReg(&PPC::CRRCRegClass); - BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC)) - .addImm(PPCPred).addReg(CondReg).addMBB(TBB); - FastEmitBranch(FBB, DbgLoc); - FuncInfo.MBB->addSuccessor(TBB); - return true; + if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), + CondReg)) + return false; + BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC)) + .addImm(PPCPred).addReg(CondReg).addMBB(TBB); + finishCondBranch(BI->getParent(), TBB, FBB); + return true; + } } else if (const ConstantInt *CI = dyn_cast(BI->getCondition())) { uint64_t Imm = CI->getZExtValue(); MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; - FastEmitBranch(Target, DbgLoc); + fastEmitBranch(Target, DbgLoc); return true; } @@ -735,11 +797,14 @@ bool PPCFastISel::SelectBranch(const Instruction *I) { bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, bool IsZExt, unsigned DestReg) { Type *Ty = SrcValue1->getType(); - EVT SrcEVT = TLI.getValueType(Ty, true); + EVT SrcEVT = TLI.getValueType(DL, Ty, true); if (!SrcEVT.isSimple()) return false; MVT SrcVT = SrcEVT.getSimpleVT(); + if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits()) + return false; + // See if operand 2 is an immediate encodeable in the compare. // FIXME: Operands are not in canonical order at -O0, so an immediate // operand in position 1 is a lost opportunity for now. We are @@ -826,8 +891,8 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, // Attempt to fast-select a floating-point extend instruction. bool PPCFastISel::SelectFPExt(const Instruction *I) { Value *Src = I->getOperand(0); - EVT SrcVT = TLI.getValueType(Src->getType(), true); - EVT DestVT = TLI.getValueType(I->getType(), true); + EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::f32 || DestVT != MVT::f64) return false; @@ -837,15 +902,15 @@ bool PPCFastISel::SelectFPExt(const Instruction *I) { return false; // No code is generated for a FP extend. - UpdateValueMap(I, SrcReg); + updateValueMap(I, SrcReg); return true; } // Attempt to fast-select a floating-point truncate instruction. bool PPCFastISel::SelectFPTrunc(const Instruction *I) { Value *Src = I->getOperand(0); - EVT SrcVT = TLI.getValueType(Src->getType(), true); - EVT DestVT = TLI.getValueType(I->getType(), true); + EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::f64 || DestVT != MVT::f32) return false; @@ -859,12 +924,12 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg) .addReg(SrcReg); - UpdateValueMap(I, DestReg); + updateValueMap(I, DestReg); return true; } // Move an i32 or i64 value in a GPR to an f64 value in an FPR. -// FIXME: When direct register moves are implemented (see PowerISA 2.08), +// FIXME: When direct register moves are implemented (see PowerISA 2.07), // those should be used instead of moving via a stack slot when the // subtarget permits. // FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte @@ -895,11 +960,13 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, unsigned LoadOpc = PPC::LFD; if (SrcVT == MVT::i32) { - Addr.Offset = 4; - if (!IsSigned) + if (!IsSigned) { LoadOpc = PPC::LFIWZX; - else if (PPCSubTarget.hasLFIWAX()) + Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4; + } else if (PPCSubTarget->hasLFIWAX()) { LoadOpc = PPC::LFIWAX; + Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4; + } } const TargetRegisterClass *RC = &PPC::F8RCRegClass; @@ -911,6 +978,8 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, } // Attempt to fast-select an integer-to-floating-point conversion. +// FIXME: Once fast-isel has better support for VSX, conversions using +// direct moves should be implemented. bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { MVT DstVT; Type *DstTy = I->getType(); @@ -921,7 +990,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { return false; Value *Src = I->getOperand(0); - EVT SrcEVT = TLI.getValueType(Src->getType(), true); + EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true); if (!SrcEVT.isSimple()) return false; @@ -937,7 +1006,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { // We can only lower an unsigned convert if we have the newer // floating-point conversion operations. - if (!IsSigned && !PPCSubTarget.hasFPCVT()) + if (!IsSigned && !PPCSubTarget->hasFPCVT()) return false; // FIXME: For now we require the newer floating-point conversion operations @@ -945,7 +1014,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { // to single-precision float. Otherwise we have to generate a lot of // fiddly code to avoid double rounding. If necessary, the fiddly code // can be found in PPCTargetLowering::LowerINT_TO_FP(). - if (DstVT == MVT::f32 && !PPCSubTarget.hasFPCVT()) + if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT()) return false; // Extend the input if necessary. @@ -976,13 +1045,13 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addReg(FPReg); - UpdateValueMap(I, DestReg); + updateValueMap(I, DestReg); return true; } // Move the floating-point value in SrcReg into an integer destination // register, and return the register (or zero if we can't handle it). -// FIXME: When direct register moves are implemented (see PowerISA 2.08), +// FIXME: When direct register moves are implemented (see PowerISA 2.07), // those should be used instead of moving via a stack slot when the // subtarget permits. unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, @@ -1008,7 +1077,7 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, // to determine the required register class. unsigned AssignedReg = FuncInfo.ValueMap[I]; const TargetRegisterClass *RC = - AssignedReg ? MRI.getRegClass(AssignedReg) : 0; + AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; unsigned ResultReg = 0; if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned)) @@ -1018,6 +1087,8 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, } // Attempt to fast-select a floating-point-to-integer conversion. +// FIXME: Once fast-isel has better support for VSX, conversions using +// direct moves should be implemented. bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { MVT DstVT, SrcVT; Type *DstTy = I->getType(); @@ -1027,6 +1098,10 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { if (DstVT != MVT::i32 && DstVT != MVT::i64) return false; + // If we don't have FCTIDUZ and we need it, punt to SelectionDAG. + if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT()) + return false; + Value *Src = I->getOperand(0); Type *SrcTy = Src->getType(); if (!isTypeLegal(SrcTy, SrcVT)) @@ -1060,7 +1135,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { if (IsSigned) Opc = PPC::FCTIWZ; else - Opc = PPCSubTarget.hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ; + Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ; else Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ; @@ -1073,14 +1148,14 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { if (IntReg == 0) return false; - UpdateValueMap(I, IntReg); + updateValueMap(I, IntReg); return true; } // Attempt to fast-select a binary integer operation that isn't already // handled automatically. bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { - EVT DestVT = TLI.getValueType(I->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); // We can get here in the case when we have a binary operation on a non-legal // type and the target independent selector doesn't know how to handle it. @@ -1162,7 +1237,7 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { ResultReg) .addReg(SrcReg1) .addImm(Imm); - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } } @@ -1178,7 +1253,7 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(SrcReg1).addReg(SrcReg2); - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } @@ -1193,7 +1268,12 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, unsigned &NumBytes, bool IsVarArg) { SmallVector ArgLocs; - CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context); + CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context); + + // Reserve space for the linkage area on the stack. + unsigned LinkageSize = PPCSubTarget->getFrameLowering()->getLinkageSize(); + CCInfo.AllocateStack(LinkageSize, 8); + CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS); // Bail out if we can't handle any of the arguments. @@ -1203,7 +1283,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, // Skip vector arguments for now, as well as long double and // uint128_t, and anything that isn't passed in a register. - if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || + if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 || !VA.isRegLoc() || VA.needsCustom()) return false; @@ -1215,6 +1295,14 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, // Get a count of how many bytes are to be pushed onto the stack. NumBytes = CCInfo.getNextStackOffset(); + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if its varargs. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. + NumBytes = std::max(NumBytes, LinkageSize + 64); + // Issue CALLSEQ_START. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TII.getCallFrameSetupOpcode())) @@ -1222,7 +1310,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, // Prepare to assign register arguments. Every argument uses up a // GPR protocol register even if it's passed in a floating-point - // register. + // register (unless we're using the fast calling convention). unsigned NextGPR = PPC::X3; unsigned NextFPR = PPC::F1; @@ -1272,7 +1360,8 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, unsigned ArgReg; if (ArgVT == MVT::f32 || ArgVT == MVT::f64) { ArgReg = NextFPR++; - ++NextGPR; + if (CC != CallingConv::Fast) + ++NextGPR; } else ArgReg = NextGPR++; @@ -1286,9 +1375,9 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, // For a call that we've determined we can fast-select, finish the // call sequence and generate a copy to obtain the return value (if any). -void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl &UsedRegs, - const Instruction *I, CallingConv::ID CC, - unsigned &NumBytes, bool IsVarArg) { +bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) { + CallingConv::ID CC = CLI.CallConv; + // Issue CallSEQ_END. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TII.getCallFrameDestroyOpcode())) @@ -1299,7 +1388,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl &UsedRegs, // any real difficulties there. if (RetVT != MVT::isVoid) { SmallVector RVLocs; - CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context); + CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); CCValAssign &VA = RVLocs[0]; assert(RVLocs.size() == 1 && "No support for multi-reg return values!"); @@ -1344,52 +1433,51 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl &UsedRegs, } assert(ResultReg && "ResultReg unset!"); - UsedRegs.push_back(SourcePhysReg); - UpdateValueMap(I, ResultReg); + CLI.InRegs.push_back(SourcePhysReg); + CLI.ResultReg = ResultReg; + CLI.NumResultRegs = 1; } + + return true; } -// Attempt to fast-select a call instruction. -bool PPCFastISel::SelectCall(const Instruction *I) { - const CallInst *CI = cast(I); - const Value *Callee = CI->getCalledValue(); +bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) { + CallingConv::ID CC = CLI.CallConv; + bool IsTailCall = CLI.IsTailCall; + bool IsVarArg = CLI.IsVarArg; + const Value *Callee = CLI.Callee; + const MCSymbol *Symbol = CLI.Symbol; - // Can't handle inline asm. - if (isa(Callee)) + if (!Callee && !Symbol) return false; // Allow SelectionDAG isel to handle tail calls. - if (CI->isTailCall()) + if (IsTailCall) return false; - // Obtain calling convention. - ImmutableCallSite CS(CI); - CallingConv::ID CC = CS.getCallingConv(); - - PointerType *PT = cast(CS.getCalledValue()->getType()); - FunctionType *FTy = cast(PT->getElementType()); - bool IsVarArg = FTy->isVarArg(); - - // Not ready for varargs yet. + // Let SDISel handle vararg functions. if (IsVarArg) return false; // Handle simple calls for now, with legal return types and // those that can be extended. - Type *RetTy = I->getType(); + Type *RetTy = CLI.RetTy; MVT RetVT; if (RetTy->isVoidTy()) RetVT = MVT::isVoid; else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 && RetVT != MVT::i8) return false; + else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits()) + // We can't handle boolean returns when CR bits are in use. + return false; // FIXME: No multi-register return values yet. if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 && RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 && RetVT != MVT::f64) { SmallVector RVLocs; - CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context); + CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context); CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); if (RVLocs.size() > 1) return false; @@ -1397,7 +1485,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) { // Bail early if more than 8 arguments, as we only currently // handle arguments passed in registers. - unsigned NumArgs = CS.arg_size(); + unsigned NumArgs = CLI.OutVals.size(); if (NumArgs > 8) return false; @@ -1412,28 +1500,16 @@ bool PPCFastISel::SelectCall(const Instruction *I) { ArgVTs.reserve(NumArgs); ArgFlags.reserve(NumArgs); - for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end(); - II != IE; ++II) { - // FIXME: ARM does something for intrinsic calls here, check into that. - - unsigned AttrIdx = II - CS.arg_begin() + 1; - + for (unsigned i = 0, ie = NumArgs; i != ie; ++i) { // Only handle easy calls for now. It would be reasonably easy // to handle <= 8-byte structures passed ByVal in registers, but we // have to ensure they are right-justified in the register. - if (CS.paramHasAttr(AttrIdx, Attribute::InReg) || - CS.paramHasAttr(AttrIdx, Attribute::StructRet) || - CS.paramHasAttr(AttrIdx, Attribute::Nest) || - CS.paramHasAttr(AttrIdx, Attribute::ByVal)) + ISD::ArgFlagsTy Flags = CLI.OutFlags[i]; + if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal()) return false; - ISD::ArgFlagsTy Flags; - if (CS.paramHasAttr(AttrIdx, Attribute::SExt)) - Flags.setSExt(); - if (CS.paramHasAttr(AttrIdx, Attribute::ZExt)) - Flags.setZExt(); - - Type *ArgTy = (*II)->getType(); + Value *ArgValue = CLI.OutVals[i]; + Type *ArgTy = ArgValue->getType(); MVT ArgVT; if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8) return false; @@ -1441,14 +1517,11 @@ bool PPCFastISel::SelectCall(const Instruction *I) { if (ArgVT.isVector()) return false; - unsigned Arg = getRegForValue(*II); + unsigned Arg = getRegForValue(ArgValue); if (Arg == 0) return false; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - Flags.setOrigAlign(OriginalAlignment); - - Args.push_back(*II); + Args.push_back(ArgValue); ArgRegs.push_back(Arg); ArgVTs.push_back(ArgVT); ArgFlags.push_back(Flags); @@ -1462,35 +1535,46 @@ bool PPCFastISel::SelectCall(const Instruction *I) { RegArgs, CC, NumBytes, IsVarArg)) return false; + MachineInstrBuilder MIB; // FIXME: No handling for function pointers yet. This requires // implementing the function descriptor (OPD) setup. const GlobalValue *GV = dyn_cast(Callee); - if (!GV) - return false; - - // Build direct call with NOP for TOC restore. - // FIXME: We can and should optimize away the NOP for local calls. - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(PPC::BL8_NOP)); - // Add callee. - MIB.addGlobalAddress(GV); + if (!GV) { + // patchpoints are a special case; they always dispatch to a pointer value. + // However, we don't actually want to generate the indirect call sequence + // here (that will be generated, as necessary, during asm printing), and + // the call we generate here will be erased by FastISel::selectPatchpoint, + // so don't try very hard... + if (CLI.IsPatchPoint) + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP)); + else + return false; + } else { + // Build direct call with NOP for TOC restore. + // FIXME: We can and should optimize away the NOP for local calls. + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(PPC::BL8_NOP)); + // Add callee. + MIB.addGlobalAddress(GV); + } // Add implicit physical register uses to the call. for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II) MIB.addReg(RegArgs[II], RegState::Implicit); + // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live + // into the call. + PPCFuncInfo->setUsesTOCBasePtr(); + MIB.addReg(PPC::X2, RegState::Implicit); + // Add a register mask with the call-preserved registers. Proper // defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CC)); + MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); - // Finish off the call including any return values. - SmallVector UsedRegs; - finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg); - - // Set all unused physregs defs as dead. - static_cast(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); + CLI.Call = MIB; - return true; + // Finish off the call including any return values. + return finishCall(RetVT, CLI, NumBytes); } // Attempt to fast-select a return instruction. @@ -1508,11 +1592,11 @@ bool PPCFastISel::SelectRet(const Instruction *I) { if (Ret->getNumOperands() > 0) { SmallVector Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector ValLocs; - CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, *Context); + CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context); CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS); const Value *RV = Ret->getOperand(0); @@ -1520,15 +1604,22 @@ bool PPCFastISel::SelectRet(const Instruction *I) { if (ValLocs.size() > 1) return false; - // Special case for returning a constant integer of any size. - // Materialize the constant as an i64 and copy it to the return - // register. This avoids an unnecessary extend or truncate. - if (isa(*RV)) { - const Constant *C = cast(RV); - unsigned SrcReg = PPCMaterializeInt(C, MVT::i64); - unsigned RetReg = ValLocs[0].getLocReg(); + // Special case for returning a constant integer of any size - materialize + // the constant as an i64 and copy it to the return register. + if (const ConstantInt *CI = dyn_cast(RV)) { + CCValAssign &VA = ValLocs[0]; + + unsigned RetReg = VA.getLocReg(); + // We still need to worry about properly extending the sign. For example, + // we could have only a single bit or a constant that needs zero + // extension rather than sign extension. Make sure we pass the return + // value extension property to integer materialization. + unsigned SrcReg = + PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() == CCValAssign::SExt); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg); + TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg); + RetRegs.push_back(RetReg); } else { @@ -1545,7 +1636,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) { RetRegs.push_back(VA.getLocReg()); unsigned SrcReg = Reg + VA.getValNo(); - EVT RVEVT = TLI.getValueType(RV->getType()); + EVT RVEVT = TLI.getValueType(DL, RV->getType()); if (!RVEVT.isSimple()) return false; MVT RVVT = RVEVT.getSimpleVT(); @@ -1591,7 +1682,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) { } MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(PPC::BLR)); + TII.get(PPC::BLR8)); for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) MIB.addReg(RetRegs[i], RegState::Implicit); @@ -1664,8 +1755,8 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8)); const IndirectBrInst *IB = cast(I); - for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i) - FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]); + for (const BasicBlock *SuccBB : IB->successors()) + FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]); return true; } @@ -1673,8 +1764,8 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) { // Attempt to fast-select an integer truncate instruction. bool PPCFastISel::SelectTrunc(const Instruction *I) { Value *Src = I->getOperand(0); - EVT SrcVT = TLI.getValueType(Src->getType(), true); - EVT DestVT = TLI.getValueType(I->getType(), true); + EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16) return false; @@ -1695,7 +1786,7 @@ bool PPCFastISel::SelectTrunc(const Instruction *I) { SrcReg = ResultReg; } - UpdateValueMap(I, SrcReg); + updateValueMap(I, SrcReg); return true; } @@ -1710,8 +1801,8 @@ bool PPCFastISel::SelectIntExt(const Instruction *I) { if (!SrcReg) return false; EVT SrcEVT, DestEVT; - SrcEVT = TLI.getValueType(SrcTy, true); - DestEVT = TLI.getValueType(DestTy, true); + SrcEVT = TLI.getValueType(DL, SrcTy, true); + DestEVT = TLI.getValueType(DL, DestTy, true); if (!SrcEVT.isSimple()) return false; if (!DestEVT.isSimple()) @@ -1734,13 +1825,13 @@ bool PPCFastISel::SelectIntExt(const Instruction *I) { if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt)) return false; - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } // Attempt to fast-select an instruction that wasn't handled by // the table-generated machinery. -bool PPCFastISel::TargetSelectInstruction(const Instruction *I) { +bool PPCFastISel::fastSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { case Instruction::Load: @@ -1770,9 +1861,7 @@ bool PPCFastISel::TargetSelectInstruction(const Instruction *I) { case Instruction::Sub: return SelectBinaryIntOp(I, ISD::SUB); case Instruction::Call: - if (dyn_cast(I)) - return false; - return SelectCall(I); + return selectCall(I); case Instruction::Ret: return SelectRet(I); case Instruction::Trunc: @@ -1803,14 +1892,14 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); CodeModel::Model CModel = TM.getCodeModel(); - MachineMemOperand *MMO = - FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, - (VT == MVT::f32) ? 4 : 8, Align); + MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( + MachinePointerInfo::getConstantPool(*FuncInfo.MF), + MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align); unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD; unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); + PPCFuncInfo->setUsesTOCBasePtr(); // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)). if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT), @@ -1855,18 +1944,12 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // FIXME: Jump tables are not yet required because fast-isel doesn't // handle switches; if that changes, we need them as well. For now, // what follows assumes everything's a generic (or TLS) global address. - const GlobalVariable *GVar = dyn_cast(GV); - if (!GVar) { - // If GV is an alias, use the aliasee for determining thread-locality. - if (const GlobalAlias *GA = dyn_cast(GV)) - GVar = dyn_cast_or_null(GA->resolveAliasedGlobal(false)); - } // FIXME: We don't yet handle the complexity of TLS. - bool IsTLS = GVar && GVar->isThreadLocal(); - if (IsTLS) + if (GV->isThreadLocal()) return 0; + PPCFuncInfo->setUsesTOCBasePtr(); // For small code model, generate a simple TOC load. if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc), @@ -1874,8 +1957,8 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { .addGlobalAddress(GV) .addReg(PPC::X2); else { - // If the address is an externally defined symbol, a symbol with - // common or externally available linkage, a function address, or a + // If the address is an externally defined symbol, a symbol with common + // or externally available linkage, a non-local function address, or a // jump table address (not yet needed), or if we are generating code // for large code model, we generate: // LDtocL(GV, ADDIStocHA(%X2, GV)) @@ -1886,18 +1969,15 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), HighPartReg).addReg(PPC::X2).addGlobalAddress(GV); - // !GVar implies a function address. An external variable is one - // without an initializer. - // If/when switches are implemented, jump tables should be handled - // on the "if" path here. - if (CModel == CodeModel::Large || !GVar || !GVar->hasInitializer() || - GVar->hasCommonLinkage() || GVar->hasAvailableExternallyLinkage()) + unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV); + if (GVFlags & PPCII::MO_NLP_FLAG) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), DestReg).addGlobalAddress(GV).addReg(HighPartReg); - else + } else { // Otherwise generate the ADDItocL. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL), DestReg).addReg(HighPartReg).addGlobalAddress(GV); + } } return DestReg; @@ -1994,7 +2074,16 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, // Materialize an integer constant into a register, and return // the register number (or zero if we failed to handle it). -unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) { +unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT, + bool UseSExt) { + // If we're using CR bit registers for i1 values, handle that as a special + // case first. + if (VT == MVT::i1 && PPCSubTarget->useCRBits()) { + unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg); + return ImmReg; + } if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) @@ -2004,17 +2093,22 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) { &PPC::GPRCRegClass); // If the constant is in range, use a load-immediate. - const ConstantInt *CI = cast(C); - if (isInt<16>(CI->getSExtValue())) { + if (UseSExt && isInt<16>(CI->getSExtValue())) { unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI; unsigned ImmReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg) - .addImm(CI->getSExtValue()); + .addImm(CI->getSExtValue()); + return ImmReg; + } else if (!UseSExt && isUInt<16>(CI->getZExtValue())) { + unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI; + unsigned ImmReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg) + .addImm(CI->getZExtValue()); return ImmReg; } // Construct the constant piecewise. - int64_t Imm = CI->getZExtValue(); + int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue(); if (VT == MVT::i64) return PPCMaterialize64BitInt(Imm, RC); @@ -2026,8 +2120,8 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) { // Materialize a constant into a register, and return the register // number (or zero if we failed to handle it). -unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) { - EVT CEVT = TLI.getValueType(C->getType(), true); +unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) { + EVT CEVT = TLI.getValueType(DL, C->getType(), true); // Only handle simple types. if (!CEVT.isSimple()) return 0; @@ -2037,15 +2131,15 @@ unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) { return PPCMaterializeFP(CFP, VT); else if (const GlobalValue *GV = dyn_cast(C)) return PPCMaterializeGV(GV, VT); - else if (isa(C)) - return PPCMaterializeInt(C, VT); + else if (const ConstantInt *CI = dyn_cast(C)) + return PPCMaterializeInt(CI, VT, VT != MVT::i1); return 0; } // Materialize the address created by an alloca into a register, and // return the register number (or zero if we failed to handle it). -unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { +unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) { // Don't handle dynamic allocas. if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; @@ -2136,7 +2230,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, unsigned ResultReg = MI->getOperand(0).getReg(); - if (!PPCEmitLoad(VT, ResultReg, Addr, 0, IsZExt)) + if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt)) return false; MI->eraseFromParent(); @@ -2145,7 +2239,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, // Attempt to lower call arguments in a faster way than done by // the selection DAG code. -bool PPCFastISel::FastLowerArguments() { +bool PPCFastISel::fastLowerArguments() { // Defer to normal argument lowering for now. It's reasonably // efficient. Consider doing something like ARM to handle the // case where all args fit in registers, no varargs, no float @@ -2155,11 +2249,20 @@ bool PPCFastISel::FastLowerArguments() { // Handle materializing integer constants into a register. This is not // automatically generated for PowerPC, so must be explicitly created here. -unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { +unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { if (Opc != ISD::Constant) return 0; + // If we're using CR bit registers for i1 values, handle that as a special + // case first. + if (VT == MVT::i1 && PPCSubTarget->useCRBits()) { + unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg); + return ImmReg; + } + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) return 0; @@ -2183,7 +2286,7 @@ unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { // assigning R0 or X0 to the output register for GPRC and G8RC // register classes, as any such result could be used in ADDI, etc., // where those regs have another meaning. -unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, +unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm) { @@ -2196,27 +2299,27 @@ unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); - return FastISel::FastEmitInst_ri(MachineInstOpcode, UseRC, + return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC, Op0, Op0IsKill, Imm); } // Override for instructions with one register operand to avoid use of // R0/X0. The automatic infrastructure isn't aware of the context so // we must be conservative. -unsigned PPCFastISel::FastEmitInst_r(unsigned MachineInstOpcode, +unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass* RC, unsigned Op0, bool Op0IsKill) { const TargetRegisterClass *UseRC = (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); - return FastISel::FastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill); + return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill); } // Override for instructions with two register operands to avoid use // of R0/X0. The automatic infrastructure isn't aware of the context // so we must be conservative. -unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, +unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass* RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill) { @@ -2224,7 +2327,7 @@ unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); - return FastISel::FastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill, + return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill, Op1, Op1IsKill); } @@ -2232,13 +2335,10 @@ namespace llvm { // Create the fast instruction selector for PowerPC64 ELF. FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) { - const TargetMachine &TM = FuncInfo.MF->getTarget(); - // Only available on 64-bit ELF for now. - const PPCSubtarget *Subtarget = &TM.getSubtarget(); - if (Subtarget->isPPC64() && Subtarget->isSVR4ABI()) + const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget(); + if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) return new PPCFastISel(FuncInfo, LibInfo); - - return 0; + return nullptr; } }