[PowerPC] Fix large code model with the ELFv2 ABI
[oota-llvm.git] / lib / Target / PowerPC / PPCAsmPrinter.cpp
index b42b0f9ef478d767c8008f7e19c1cea8e0d2ed83..ec354c209ca0425e22fddd21802a591c20c211f5 100644 (file)
@@ -65,19 +65,20 @@ using namespace llvm;
 #define DEBUG_TYPE "asmprinter"
 
 namespace {
-  class PPCAsmPrinter : public AsmPrinter {
-  protected:
-    MapVector<MCSymbol*, MCSymbol*> TOC;
-    const PPCSubtarget *Subtarget;
-    StackMaps SM;
-  public:
-    explicit PPCAsmPrinter(TargetMachine &TM,
-                           std::unique_ptr<MCStreamer> Streamer)
-        : AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
-
-    const char *getPassName() const override {
-      return "PowerPC Assembly Printer";
-    }
+class PPCAsmPrinter : public AsmPrinter {
+protected:
+  MapVector<MCSymbol *, MCSymbol *> TOC;
+  const PPCSubtarget *Subtarget;
+  StackMaps SM;
+
+public:
+  explicit PPCAsmPrinter(TargetMachine &TM,
+                         std::unique_ptr<MCStreamer> Streamer)
+      : AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
+
+  const char *getPassName() const override {
+    return "PowerPC Assembly Printer";
+  }
 
     MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
 
@@ -94,10 +95,8 @@ namespace {
 
     void EmitEndOfAsmFile(Module &M) override;
 
-    void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
-                       const MachineInstr &MI);
-    void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
-                         const MachineInstr &MI);
+    void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI);
+    void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
     void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
     bool runOnMachineFunction(MachineFunction &MF) override {
       Subtarget = &MF.getSubtarget<PPCSubtarget>();
@@ -157,15 +156,15 @@ static const char *stripRegisterPrefix(const char *RegName) {
       return RegName + 1;
     case 'c': if (RegName[1] == 'r') return RegName + 2;
   }
-  
+
   return RegName;
 }
 
 void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
                                  raw_ostream &O) {
-  const DataLayout *DL = TM.getDataLayout();
+  const DataLayout &DL = getDataLayout();
   const MachineOperand &MO = MI->getOperand(OpNo);
-  
+
   switch (MO.getType()) {
   case MachineOperand::MO_Register: {
     const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
@@ -184,8 +183,8 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     MO.getMBB()->getSymbol()->print(O, MAI);
     return;
   case MachineOperand::MO_ConstantPoolIndex:
-    O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
-      << '_' << MO.getIndex();
+    O << DL.getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+      << MO.getIndex();
     return;
   case MachineOperand::MO_BlockAddress:
     GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI);
@@ -197,22 +196,22 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
 
     // External or weakly linked global variables need non-lazily-resolved stubs
     if (TM.getRelocationModel() != Reloc::Static &&
-        (GV->isDeclaration() || GV->isWeakForLinker())) {
+        !GV->isStrongDefinitionForLinker()) {
       if (!GV->hasHiddenVisibility()) {
         SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-        MachineModuleInfoImpl::StubValueTy &StubSym = 
-          MMI->getObjFileInfo<MachineModuleInfoMachO>()
-            .getGVStubEntry(SymToPrint);
+        MachineModuleInfoImpl::StubValueTy &StubSym =
+            MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(
+                SymToPrint);
         if (!StubSym.getPointer())
           StubSym = MachineModuleInfoImpl::
             StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
       } else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
                  GV->hasAvailableExternallyLinkage()) {
         SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-        
-        MachineModuleInfoImpl::StubValueTy &StubSym = 
-          MMI->getObjFileInfo<MachineModuleInfoMachO>().
-                    getHiddenGVStubEntry(SymToPrint);
+
+        MachineModuleInfoImpl::StubValueTy &StubSym =
+            MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(
+                SymToPrint);
         if (!StubSym.getPointer())
           StubSym = MachineModuleInfoImpl::
             StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
@@ -295,16 +294,16 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
       }
     case 'U': // Print 'u' for update form.
     case 'X': // Print 'x' for indexed form.
-      {
-       // FIXME: Currently for PowerPC memory operands are always loaded
-       // into a register, so we never get an update or indexed form.
-       // This is bad even for offset forms, since even if we know we
-       // have a value in -16(r1), we will generate a load into r<n>
-       // and then load from 0(r<n>).  Until that issue is fixed,
-       // tolerate 'U' and 'X' but don't output anything.
-       assert(MI->getOperand(OpNo).isReg());
-       return false;
-      }
+    {
+      // FIXME: Currently for PowerPC memory operands are always loaded
+      // into a register, so we never get an update or indexed form.
+      // This is bad even for offset forms, since even if we know we
+      // have a value in -16(r1), we will generate a load into r<n>
+      // and then load from 0(r<n>).  Until that issue is fixed,
+      // tolerate 'U' and 'X' but don't output anything.
+      assert(MI->getOperand(OpNo).isReg());
+      return false;
+    }
     }
   }
 
@@ -315,7 +314,6 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
   return false;
 }
 
-
 /// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry
 /// exists for it.  If not, create one.  Then return a symbol that references
 /// the TOC entry.
@@ -330,8 +328,7 @@ void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) {
   SM.serializeToStackMapSection();
 }
 
-void PPCAsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
-                                  const MachineInstr &MI) {
+void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
   unsigned NumNOPBytes = MI.getOperand(1).getImm();
 
   SM.recordStackMap(MI);
@@ -353,44 +350,98 @@ void PPCAsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
 
   // Emit nops.
   for (unsigned i = 0; i < NumNOPBytes; i += 4)
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
 }
 
 // Lower a patchpoint of the form:
 // [<def>], <id>, <numBytes>, <target>, <numArgs>
-void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
-                                    const MachineInstr &MI) {
+void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
   SM.recordPatchPoint(MI);
   PatchPointOpers Opers(&MI);
 
-  int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
   unsigned EncodedBytes = 0;
-  if (CallTarget) {
-    assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
-           "High 16 bits of call target should be zero.");
-    unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
-    EncodedBytes = 6*4;
-    // Materialize the jump address:
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8)
-                                    .addReg(ScratchReg)
-                                    .addImm((CallTarget >> 32) & 0xFFFF));
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC)
-                                    .addReg(ScratchReg)
-                                    .addReg(ScratchReg)
-                                    .addImm(32).addImm(16));
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8)
-                                    .addReg(ScratchReg)
-                                    .addReg(ScratchReg)
-                                    .addImm((CallTarget >> 16) & 0xFFFF));
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8)
-                                    .addReg(ScratchReg)
-                                    .addReg(ScratchReg)
-                                    .addImm(CallTarget & 0xFFFF));
-
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8).addReg(ScratchReg));
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8));
+  const MachineOperand &CalleeMO =
+    Opers.getMetaOper(PatchPointOpers::TargetPos);
+
+  if (CalleeMO.isImm()) {
+    int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
+    if (CallTarget) {
+      assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
+             "High 16 bits of call target should be zero.");
+      unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
+      EncodedBytes = 0;
+      // Materialize the jump address:
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI8)
+                                      .addReg(ScratchReg)
+                                      .addImm((CallTarget >> 32) & 0xFFFF));
+      ++EncodedBytes;
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::RLDIC)
+                                      .addReg(ScratchReg)
+                                      .addReg(ScratchReg)
+                                      .addImm(32).addImm(16));
+      ++EncodedBytes;
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ORIS8)
+                                      .addReg(ScratchReg)
+                                      .addReg(ScratchReg)
+                                      .addImm((CallTarget >> 16) & 0xFFFF));
+      ++EncodedBytes;
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ORI8)
+                                      .addReg(ScratchReg)
+                                      .addReg(ScratchReg)
+                                      .addImm(CallTarget & 0xFFFF));
+
+      // Save the current TOC pointer before the remote call.
+      int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40;
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::STD)
+                                      .addReg(PPC::X2)
+                                      .addImm(TOCSaveOffset)
+                                      .addReg(PPC::X1));
+      ++EncodedBytes;
+
+      // If we're on ELFv1, then we need to load the actual function pointer
+      // from the function descriptor.
+      if (!Subtarget->isELFv2ABI()) {
+        // Load the new TOC pointer and the function address, but not r11
+        // (needing this is rare, and loading it here would prevent passing it
+        // via a 'nest' parameter.
+        EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
+                                        .addReg(PPC::X2)
+                                        .addImm(8)
+                                        .addReg(ScratchReg));
+        ++EncodedBytes;
+        EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
+                                        .addReg(ScratchReg)
+                                        .addImm(0)
+                                        .addReg(ScratchReg));
+        ++EncodedBytes;
+      }
+
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTCTR8)
+                                      .addReg(ScratchReg));
+      ++EncodedBytes;
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCTRL8));
+      ++EncodedBytes;
+
+      // Restore the TOC pointer after the call.
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
+                                      .addReg(PPC::X2)
+                                      .addImm(TOCSaveOffset)
+                                      .addReg(PPC::X1));
+      ++EncodedBytes;
+    }
+  } else if (CalleeMO.isGlobal()) {
+    const GlobalValue *GValue = CalleeMO.getGlobal();
+    MCSymbol *MOSymbol = getSymbol(GValue);
+    const MCExpr *SymVar = MCSymbolRefExpr::create(MOSymbol, OutContext);
+
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL8_NOP)
+                                    .addExpr(SymVar));
+    EncodedBytes += 2;
   }
 
+  // Each instruction is 4 bytes.
+  EncodedBytes *= 4;
+
   // Emit padding.
   unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
   assert(NumBytes >= EncodedBytes &&
@@ -398,7 +449,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
   assert((NumBytes - EncodedBytes) % 4 == 0 &&
          "Invalid number of NOP bytes requested!");
   for (unsigned i = EncodedBytes; i < NumBytes; i += 4)
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
 }
 
 /// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a
@@ -440,19 +491,19 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
 void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   MCInst TmpInst;
   bool isPPC64 = Subtarget->isPPC64();
-  bool isDarwin = Triple(TM.getTargetTriple()).isOSDarwin();
+  bool isDarwin = TM.getTargetTriple().isOSDarwin();
   const Module *M = MF->getFunction()->getParent();
   PICLevel::Level PL = M->getPICLevel();
-  
+
   // Lower multi-instruction pseudo operations.
   switch (MI->getOpcode()) {
   default: break;
   case TargetOpcode::DBG_VALUE:
     llvm_unreachable("Should be handled target independently");
   case TargetOpcode::STACKMAP:
-    return LowerSTACKMAP(*OutStreamer, SM, *MI);
+    return LowerSTACKMAP(SM, *MI);
   case TargetOpcode::PATCHPOINT:
-    return LowerPATCHPOINT(*OutStreamer, SM, *MI);
+    return LowerPATCHPOINT(SM, *MI);
 
   case PPC::MoveGOTtoLR: {
     // Transform %LR = MoveGOTtoLR
@@ -477,17 +528,18 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case PPC::MovePCtoLR:
   case PPC::MovePCtoLR8: {
     // Transform %LR = MovePCtoLR
-    // Into this, where the label is the PIC base: 
+    // Into this, where the label is the PIC base:
     //     bl L1$pb
     // L1$pb:
     MCSymbol *PICBase = MF->getPICBaseSymbol();
-    
+
     // Emit the 'bl'.
-    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL)
-      // FIXME: We would like an efficient form for this, so we don't have to do
-      // a lot of extra uniquing.
-      .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
-    
+    EmitToStreamer(*OutStreamer,
+                   MCInstBuilder(PPC::BL)
+                       // FIXME: We would like an efficient form for this, so we
+                       // don't have to do a lot of extra uniquing.
+                       .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
+
     // Emit the label.
     OutStreamer->EmitLabel(PICBase);
     return;
@@ -598,7 +650,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     EmitToStreamer(*OutStreamer, TmpInst);
     return;
   }
-      
+
   case PPC::ADDIStocHA: {
     // Transform %Xd = ADDIStocHA %X2, <ga:@sym>
     LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
@@ -613,28 +665,22 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
             MO.isBlockAddress()) &&
            "Invalid operand for ADDIStocHA!");
     MCSymbol *MOSymbol = nullptr;
-    bool IsExternal = false;
-    bool IsNonLocalFunction = false;
-    bool IsCommon = false;
-    bool IsAvailExt = false;
+    bool GlobalToc = false;
 
     if (MO.isGlobal()) {
       const GlobalValue *GV = MO.getGlobal();
       MOSymbol = getSymbol(GV);
-      IsExternal = GV->isDeclaration();
-      IsCommon = GV->hasCommonLinkage();
-      IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
-        (GV->isDeclaration() || GV->isWeakForLinker());
-      IsAvailExt = GV->hasAvailableExternallyLinkage();
-    } else if (MO.isCPI())
+      unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
+      GlobalToc = (GVFlags & PPCII::MO_NLP_FLAG);
+    } else if (MO.isCPI()) {
       MOSymbol = GetCPISymbol(MO.getIndex());
-    else if (MO.isJTI())
+    } else if (MO.isJTI()) {
       MOSymbol = GetJTISymbol(MO.getIndex());
-    else if (MO.isBlockAddress())
+    } else if (MO.isBlockAddress()) {
       MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
+    }
 
-    if (IsExternal || IsNonLocalFunction || IsCommon || IsAvailExt ||
-        MO.isJTI() || MO.isBlockAddress() ||
+    if (GlobalToc || MO.isJTI() || MO.isBlockAddress() ||
         TM.getCodeModel() == CodeModel::Large)
       MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
 
@@ -671,13 +717,14 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
         MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
     }
     else if (MO.isGlobal()) {
-      const GlobalValue *GValue = MO.getGlobal();
-      MOSymbol = getSymbol(GValue);
-      if (GValue->getType()->getElementType()->isFunctionTy() ||
-          GValue->isDeclaration() || GValue->hasCommonLinkage() ||
-          GValue->hasAvailableExternallyLinkage() ||
-          TM.getCodeModel() == CodeModel::Large)
-        MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+      const GlobalValue *GV = MO.getGlobal();
+      MOSymbol = getSymbol(GV);
+      DEBUG(
+        unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
+        assert((GVFlags & PPCII::MO_NLP_FLAG) &&
+               "LDtocL used on symbol that could be accessed directly is "
+               "invalid. Must match ADDIStocHA."));
+      MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
     }
 
     const MCExpr *Exp =
@@ -698,21 +745,18 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MachineOperand &MO = MI->getOperand(2);
     assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
     MCSymbol *MOSymbol = nullptr;
-    bool IsExternal = false;
-    bool IsNonLocalFunction = false;
 
     if (MO.isGlobal()) {
       const GlobalValue *GV = MO.getGlobal();
+      DEBUG(
+        unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
+        assert (
+            !(GVFlags & PPCII::MO_NLP_FLAG) &&
+            "Interposable definitions must use indirect access."));
       MOSymbol = getSymbol(GV);
-      IsExternal = GV->isDeclaration();
-      IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
-        (GV->isDeclaration() || GV->isWeakForLinker());
-    } else if (MO.isCPI())
+    } else if (MO.isCPI()) {
       MOSymbol = GetCPISymbol(MO.getIndex());
-
-    if (IsNonLocalFunction || IsExternal ||
-        TM.getCodeModel() == CodeModel::Large)
-      MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+    }
 
     const MCExpr *Exp =
       MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
@@ -784,13 +828,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
   case PPC::PPC32GOT: {
-    MCSymbol *GOTSymbol = OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
-    const MCExpr *SymGotTlsL =
-      MCSymbolRefExpr::create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO,
-                              OutContext);
-    const MCExpr *SymGotTlsHA =                               
-      MCSymbolRefExpr::create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA,
-                              OutContext);
+    MCSymbol *GOTSymbol =
+        OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+    const MCExpr *SymGotTlsL = MCSymbolRefExpr::create(
+        GOTSymbol, MCSymbolRefExpr::VK_PPC_LO, OutContext);
+    const MCExpr *SymGotTlsHA = MCSymbolRefExpr::create(
+        GOTSymbol, MCSymbolRefExpr::VK_PPC_HA, OutContext);
     EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI)
                                  .addReg(MI->getOperand(0).getReg())
                                  .addExpr(SymGotTlsL));
@@ -891,11 +934,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
   case PPC::ADDISdtprelHA:
-    // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
-    // Into:      %Xd = ADDIS8 %X3, sym@dtprel@ha
+    // Transform: %Xd = ADDISdtprelHA %Xs, <ga:@sym>
+    // Into:      %Xd = ADDIS8 %Xs, sym@dtprel@ha
   case PPC::ADDISdtprelHA32: {
-    // Transform: %Rd = ADDISdtprelHA32 %R3, <ga:@sym>
-    // Into:      %Rd = ADDIS %R3, sym@dtprel@ha
+    // Transform: %Rd = ADDISdtprelHA32 %Rs, <ga:@sym>
+    // Into:      %Rd = ADDIS %Rs, sym@dtprel@ha
     const MachineOperand &MO = MI->getOperand(2);
     const GlobalValue *GValue = MO.getGlobal();
     MCSymbol *MOSymbol = getSymbol(GValue);
@@ -906,7 +949,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
         *OutStreamer,
         MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDIS8 : PPC::ADDIS)
             .addReg(MI->getOperand(0).getReg())
-            .addReg(Subtarget->isPPC64() ? PPC::X3 : PPC::R3)
+            .addReg(MI->getOperand(1).getReg())
             .addExpr(SymDtprel));
     return;
   }
@@ -1023,14 +1066,14 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
 
 void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
   // linux/ppc32 - Normal entry label.
-  if (!Subtarget->isPPC64() && 
-      (TM.getRelocationModel() != Reloc::PIC_ || 
+  if (!Subtarget->isPPC64() &&
+      (TM.getRelocationModel() != Reloc::PIC_ ||
        MF->getFunction()->getParent()->getPICLevel() == PICLevel::Small))
     return AsmPrinter::EmitFunctionEntryLabel();
 
   if (!Subtarget->isPPC64()) {
     const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
-       if (PPCFI->usesPICBase()) {
+    if (PPCFI->usesPICBase()) {
       MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol();
       MCSymbol *PICBase = MF->getPICBaseSymbol();
       OutStreamer->EmitLabel(RelocSymbol);
@@ -1049,8 +1092,28 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
   }
 
   // ELFv2 ABI - Normal entry label.
-  if (Subtarget->isELFv2ABI())
+  if (Subtarget->isELFv2ABI()) {
+    // In the Large code model, we allow arbitrary displacements between
+    // the text section and its associated TOC section.  We place the
+    // full 8-byte offset to the TOC in memory immediatedly preceding
+    // the function global entry point.
+    if (TM.getCodeModel() == CodeModel::Large
+        && !MF->getRegInfo().use_empty(PPC::X2)) {
+      const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
+
+      MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC."));
+      MCSymbol *GlobalEPSymbol = PPCFI->getGlobalEPSymbol();
+      const MCExpr *TOCDeltaExpr =
+        MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext),
+                                MCSymbolRefExpr::create(GlobalEPSymbol,
+                                                        OutContext),
+                                OutContext);
+
+      OutStreamer->EmitLabel(PPCFI->getTOCOffsetSymbol());
+      OutStreamer->EmitValue(TOCDeltaExpr, 8);
+    }
     return AsmPrinter::EmitFunctionEntryLabel();
+  }
 
   // Emit an official procedure descriptor.
   MCSectionSubPair Current = OutStreamer->getCurrentSection();
@@ -1074,11 +1137,10 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
   OutStreamer->SwitchSection(Current.first, Current.second);
 }
 
-
 bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
-  const DataLayout *TD = TM.getDataLayout();
+  const DataLayout &DL = getDataLayout();
 
-  bool isPPC64 = TD->getPointerSizeInBits() == 64;
+  bool isPPC64 = DL.getPointerSizeInBits() == 64;
 
   PPCTargetStreamer &TS =
       static_cast<PPCTargetStreamer &>(*OutStreamer->getTargetStreamer());
@@ -1118,10 +1180,25 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
   // thus emit a prefix sequence along the following lines:
   //
   // func:
+  // .Lfunc_gepNN:
+  //         # global entry point
+  //         addis r2,r12,(.TOC.-.Lfunc_gepNN)@ha
+  //         addi  r2,r2,(.TOC.-.Lfunc_gepNN)@l
+  // .Lfunc_lepNN:
+  //         .localentry func, .Lfunc_lepNN-.Lfunc_gepNN
+  //         # local entry point, followed by function body
+  //
+  // For the Large code model, we create
+  //
+  // .Lfunc_tocNN:
+  //         .quad .TOC.-.Lfunc_gepNN      # done by EmitFunctionEntryLabel
+  // func:
+  // .Lfunc_gepNN:
   //         # global entry point
-  //         addis r2,r12,(.TOC.-func)@ha
-  //         addi  r2,r2,(.TOC.-func)@l
-  //         .localentry func, .-func
+  //         ld    r2,.Lfunc_tocNN-.Lfunc_gepNN(r12)
+  //         add   r2,r2,r12
+  // .Lfunc_lepNN:
+  //         .localentry func, .Lfunc_lepNN-.Lfunc_gepNN
   //         # local entry point, followed by function body
   //
   // This ensures we have r2 set up correctly while executing the function
@@ -1129,32 +1206,49 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
   if (Subtarget->isELFv2ABI()
       // Only do all that if the function uses r2 in the first place.
       && !MF->getRegInfo().use_empty(PPC::X2)) {
+    const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
 
-    MCSymbol *GlobalEntryLabel = OutContext.createTempSymbol();
+    MCSymbol *GlobalEntryLabel = PPCFI->getGlobalEPSymbol();
     OutStreamer->EmitLabel(GlobalEntryLabel);
     const MCSymbolRefExpr *GlobalEntryLabelExp =
       MCSymbolRefExpr::create(GlobalEntryLabel, OutContext);
 
-    MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC."));
-    const MCExpr *TOCDeltaExpr =
-      MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext),
-                              GlobalEntryLabelExp, OutContext);
+    if (TM.getCodeModel() != CodeModel::Large) {
+      MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC."));
+      const MCExpr *TOCDeltaExpr =
+        MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext),
+                                GlobalEntryLabelExp, OutContext);
 
-    const MCExpr *TOCDeltaHi =
-      PPCMCExpr::createHa(TOCDeltaExpr, false, OutContext);
-    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
-                                 .addReg(PPC::X2)
-                                 .addReg(PPC::X12)
-                                 .addExpr(TOCDeltaHi));
-
-    const MCExpr *TOCDeltaLo =
-      PPCMCExpr::createLo(TOCDeltaExpr, false, OutContext);
-    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI)
-                                 .addReg(PPC::X2)
-                                 .addReg(PPC::X2)
-                                 .addExpr(TOCDeltaLo));
-
-    MCSymbol *LocalEntryLabel = OutContext.createTempSymbol();
+      const MCExpr *TOCDeltaHi =
+        PPCMCExpr::createHa(TOCDeltaExpr, false, OutContext);
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
+                                   .addReg(PPC::X2)
+                                   .addReg(PPC::X12)
+                                   .addExpr(TOCDeltaHi));
+
+      const MCExpr *TOCDeltaLo =
+        PPCMCExpr::createLo(TOCDeltaExpr, false, OutContext);
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI)
+                                   .addReg(PPC::X2)
+                                   .addReg(PPC::X2)
+                                   .addExpr(TOCDeltaLo));
+    } else {
+      MCSymbol *TOCOffset = PPCFI->getTOCOffsetSymbol();
+      const MCExpr *TOCOffsetDeltaExpr =
+        MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCOffset, OutContext),
+                                GlobalEntryLabelExp, OutContext);
+
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
+                                   .addReg(PPC::X2)
+                                   .addExpr(TOCOffsetDeltaExpr)
+                                   .addReg(PPC::X12));
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADD8)
+                                   .addReg(PPC::X2)
+                                   .addReg(PPC::X2)
+                                   .addReg(PPC::X12));
+    }
+
+    MCSymbol *LocalEntryLabel = PPCFI->getLocalEPSymbol();
     OutStreamer->EmitLabel(LocalEntryLabel);
     const MCSymbolRefExpr *LocalEntryLabelExp =
        MCSymbolRefExpr::create(LocalEntryLabel, OutContext);
@@ -1237,8 +1331,8 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
 
   // Prime text sections so they are adjacent.  This reduces the likelihood a
   // large data or debug section causes a branch to exceed 16M limit.
-  const TargetLoweringObjectFileMachO &TLOFMacho = 
-    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  const TargetLoweringObjectFileMachO &TLOFMacho =
+      static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
   OutStreamer->SwitchSection(TLOFMacho.getTextCoalSection());
   if (TM.getRelocationModel() == Reloc::PIC_) {
     OutStreamer->SwitchSection(
@@ -1269,20 +1363,21 @@ static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
 
 void PPCDarwinAsmPrinter::
 EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
-  bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+  bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64;
 
   // Construct a local MCSubtargetInfo and shadow EmitToStreamer here.
   // This is because the MachineFunction won't exist (but have not yet been
   // freed) and since we're at the global level we can use the default
   // constructed subtarget.
   std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
-      TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
+      TM.getTargetTriple().str(), TM.getTargetCPU(),
+      TM.getTargetFeatureString()));
   auto EmitToStreamer = [&STI] (MCStreamer &S, const MCInst &Inst) {
     S.EmitInstruction(Inst, *STI);
   };
 
-  const TargetLoweringObjectFileMachO &TLOFMacho = 
-    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  const TargetLoweringObjectFileMachO &TLOFMacho =
+      static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
 
   // .lazy_symbol_pointer
   MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
@@ -1296,12 +1391,12 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
     for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
       OutStreamer->SwitchSection(StubSection);
       EmitAlignment(4);
-      
+
       MCSymbol *Stub = Stubs[i].first;
       MCSymbol *RawSym = Stubs[i].second.getPointer();
       MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
       MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext);
-                                           
+
       OutStreamer->EmitLabel(Stub);
       OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
 
@@ -1406,20 +1501,19 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
       OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 4);
     }
   }
-  
+
   OutStreamer->AddBlankLine();
 }
 
-
 bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
-  bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+  bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64;
 
   // Darwin/PPC always uses mach-o.
-  const TargetLoweringObjectFileMachO &TLOFMacho = 
-    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  const TargetLoweringObjectFileMachO &TLOFMacho =
+      static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
   MachineModuleInfoMachO &MMIMacho =
-    MMI->getObjFileInfo<MachineModuleInfoMachO>();
-  
+      MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
   MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList();
   if (!Stubs.empty())
     EmitFunctionStubs(Stubs);
@@ -1427,27 +1521,27 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
   if (MAI->doesSupportExceptionHandling() && MMI) {
     // Add the (possibly multiple) personalities to the set of global values.
     // Only referenced functions get into the Personalities list.
-    const std::vector<const Function*> &Personalities = MMI->getPersonalities();
-    for (std::vector<const Function*>::const_iterator I = Personalities.begin(),
-         E = Personalities.end(); I != E; ++I) {
-      if (*I) {
-        MCSymbol *NLPSym = getSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
+    for (const Function *Personality : MMI->getPersonalities()) {
+      if (Personality) {
+        MCSymbol *NLPSym =
+            getSymbolWithGlobalValueBase(Personality, "$non_lazy_ptr");
         MachineModuleInfoImpl::StubValueTy &StubSym =
-          MMIMacho.getGVStubEntry(NLPSym);
-        StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(*I), true);
+            MMIMacho.getGVStubEntry(NLPSym);
+        StubSym =
+            MachineModuleInfoImpl::StubValueTy(getSymbol(Personality), true);
       }
     }
   }
 
   // Output stubs for dynamically-linked functions.
   Stubs = MMIMacho.GetGVStubList();
-  
+
   // Output macho stubs for external and common global variables.
   if (!Stubs.empty()) {
     // Switch with ".non_lazy_symbol_pointer" directive.
     OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
     EmitAlignment(isPPC64 ? 3 : 2);
-    
+
     for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
       // L_foo$stub:
       OutStreamer->EmitLabel(Stubs[i].first);
@@ -1478,7 +1572,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
   if (!Stubs.empty()) {
     OutStreamer->SwitchSection(getObjFileLowering().getDataSection());
     EmitAlignment(isPPC64 ? 3 : 2);
-    
+
     for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
       // L_foo$stub:
       OutStreamer->EmitLabel(Stubs[i].first);
@@ -1510,13 +1604,13 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
 static AsmPrinter *
 createPPCAsmPrinterPass(TargetMachine &tm,
                         std::unique_ptr<MCStreamer> &&Streamer) {
-  if (Triple(tm.getTargetTriple()).isMacOSX())
+  if (tm.getTargetTriple().isMacOSX())
     return new PPCDarwinAsmPrinter(tm, std::move(Streamer));
   return new PPCLinuxAsmPrinter(tm, std::move(Streamer));
 }
 
 // Force static initialization.
-extern "C" void LLVMInitializePowerPCAsmPrinter() { 
+extern "C" void LLVMInitializePowerPCAsmPrinter() {
   TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
   TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
   TargetRegistry::RegisterAsmPrinter(ThePPC64LETarget, createPPCAsmPrinterPass);