[PowerPC] Skeletal FastISel support for 64-bit PowerPC ELF.

author Bill Schmidt <wschmidt@linux.vnet.ibm.com>

Tue, 30 Jul 2013 00:50:39 +0000 (00:50 +0000)

committer Bill Schmidt <wschmidt@linux.vnet.ibm.com>

Tue, 30 Jul 2013 00:50:39 +0000 (00:50 +0000)
author Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Tue, 30 Jul 2013 00:50:39 +0000 (00:50 +0000)
committer Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Tue, 30 Jul 2013 00:50:39 +0000 (00:50 +0000)
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt

index e5c5204708db9b004cd931d031d07b4f4fda6baf..737b52174e25ad9cbdcf82da1f9395d6afbe6331 100644 (file)
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -7,6 +7,7 @@ tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
  tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info)
  tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info)
  tablegen(LLVM PPCGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM PPCGenFastISel.inc -gen-fast-isel)
  tablegen(LLVM PPCGenCallingConv.inc -gen-callingconv)
  tablegen(LLVM PPCGenSubtargetInfo.inc -gen-subtarget)
  add_public_tablegen_target(PowerPCCommonTableGen)
@@ -20,6 +21,7 @@ add_llvm_target(PowerPCCodeGen
    PPCInstrInfo.cpp
    PPCISelDAGToDAG.cpp
    PPCISelLowering.cpp
+  PPCFastISel.cpp
    PPCFrameLowering.cpp
    PPCJITInfo.cpp
    PPCMCInstLower.cpp
diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile

index 6666694ecc7b6439e0437ad7d4436ad63414fbf9..21fdcd9350e15e8e734577fa4a3718252c7ce36d 100644 (file)
--- a/lib/Target/PowerPC/Makefile
+++ b/lib/Target/PowerPC/Makefile
@@ -16,7 +16,7 @@ BUILT_SOURCES = PPCGenRegisterInfo.inc PPCGenAsmMatcher.inc \
                  PPCGenAsmWriter.inc  PPCGenCodeEmitter.inc \
                  PPCGenInstrInfo.inc PPCGenDAGISel.inc \
                  PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \
-                PPCGenMCCodeEmitter.inc
+                PPCGenMCCodeEmitter.inc PPCGenFastISel.inc
  
  DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc
  
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp

new file mode 100644 (file)

index 0000000..8cbf1fb
--- /dev/null
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -0,0 +1,328 @@
+//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC-specific support for the FastISel class. Some
+// of the target-specific code is generated by tablegen in the file
+// PPCGenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppcfastisel"
+#include "PPC.h"
+#include "PPCISelLowering.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+typedef struct Address {
+  enum {
+    RegBase,
+    FrameIndexBase
+  } BaseType;
+
+  union {
+    unsigned Reg;
+    int FI;
+  } Base;
+
+  int Offset;
+
+  // Innocuous defaults for our address.
+  Address()
+   : BaseType(RegBase), Offset(0) {
+     Base.Reg = 0;
+   }
+} Address;
+
+class PPCFastISel : public FastISel {
+
+  const TargetMachine &TM;
+  const TargetInstrInfo &TII;
+  const TargetLowering &TLI;
+  const PPCSubtarget &PPCSubTarget;
+  LLVMContext *Context;
+
+  public:
+    explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
+                         const TargetLibraryInfo *LibInfo)
+    : FastISel(FuncInfo, LibInfo),
+      TM(FuncInfo.MF->getTarget()),
+      TII(*TM.getInstrInfo()),
+      TLI(*TM.getTargetLowering()),
+      PPCSubTarget(
+       *((static_cast<const PPCTargetMachine *>(&TM))->getSubtargetImpl())
+      ),
+      Context(&FuncInfo.Fn->getContext()) { }
+
+  // Backend specific FastISel code.
+  private:
+    virtual bool TargetSelectInstruction(const Instruction *I);
+    virtual unsigned TargetMaterializeConstant(const Constant *C);
+    virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
+    virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+                                     const LoadInst *LI);
+    virtual bool FastLowerArguments();
+
+  // Utility routines.
+  private:
+    unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
+    unsigned PPCMaterializeInt(const Constant *C, MVT VT);
+    unsigned PPCMaterialize32BitInt(int64_t Imm,
+                                    const TargetRegisterClass *RC);
+    unsigned PPCMaterialize64BitInt(int64_t Imm,
+                                    const TargetRegisterClass *RC);
+
+  private:
+  #include "PPCGenFastISel.inc"
+
+};
+
+} // end anonymous namespace
+
+// Attempt to fast-select an instruction that wasn't handled by
+// the table-generated machinery.  TBD.
+bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
+  return I && false;
+}
+
+// Materialize a floating-point constant into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
+  // No plans to handle long double here.
+  if (VT != MVT::f32 && VT != MVT::f64)
+    return 0;
+
+  // All FP constants are loaded from the constant pool.
+  unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
+  assert(Align > 0 && "Unexpectedly missing alignment information!");
+  unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+  CodeModel::Model CModel = TM.getCodeModel();
+
+  MachineMemOperand *MMO =
+    FuncInfo.MF->getMachineMemOperand(
+      MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
+      (VT == MVT::f32) ? 4 : 8, Align);
+
+  // For small code model, generate a LDtocCPT.
+  if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocCPT),
+            DestReg)
+      .addConstantPoolIndex(Idx).addReg(PPC::X2).addMemOperand(MMO);
+  else {
+    // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
+    unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
+    unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA),
+            TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+      .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
+      .addReg(TmpReg)
+      .addMemOperand(MMO);
+  }
+
+  return DestReg;
+}
+
+// Materialize a 32-bit integer constant into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
+                                             const TargetRegisterClass *RC) {
+  unsigned Lo = Imm & 0xFFFF;
+  unsigned Hi = (Imm >> 16) & 0xFFFF;
+
+  unsigned ResultReg = createResultReg(RC);
+  bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+  if (isInt<16>(Imm))
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
+      .addImm(Imm);
+  else if (Lo) {
+    // Both Lo and Hi have nonzero bits.
+    unsigned TmpReg = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
+      .addImm(Hi);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
+      .addReg(TmpReg).addImm(Lo);
+  } else
+    // Just Hi bits.
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
+      .addImm(Hi);
+  
+  return ResultReg;
+}
+
+// Materialize a 64-bit integer constant into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
+                                             const TargetRegisterClass *RC) {
+  unsigned Remainder = 0;
+  unsigned Shift = 0;
+
+  // If the value doesn't fit in 32 bits, see if we can shift it
+  // so that it fits in 32 bits.
+  if (!isInt<32>(Imm)) {
+    Shift = countTrailingZeros<uint64_t>(Imm);
+    int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
+
+    if (isInt<32>(ImmSh))
+      Imm = ImmSh;
+    else {
+      Remainder = Imm;
+      Shift = 32;
+      Imm >>= 32;
+    }
+  }
+
+  // Handle the high-order 32 bits (if shifted) or the whole 32 bits
+  // (if not shifted).
+  unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
+  if (!Shift)
+    return TmpReg1;
+
+  // If upper 32 bits were not zero, we've built them and need to shift
+  // them into place.
+  unsigned TmpReg2;
+  if (Imm) {
+    TmpReg2 = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLDICR),
+            TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
+  } else
+    TmpReg2 = TmpReg1;
+
+  unsigned TmpReg3, Hi, Lo;
+  if ((Hi = (Remainder >> 16) & 0xFFFF)) {
+    TmpReg3 = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ORIS8),
+            TmpReg3).addReg(TmpReg2).addImm(Hi);
+  } else
+    TmpReg3 = TmpReg2;
+
+  if ((Lo = Remainder & 0xFFFF)) {
+    unsigned ResultReg = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ORI8),
+            ResultReg).addReg(TmpReg3).addImm(Lo);
+    return ResultReg;
+  }
+
+  return TmpReg3;
+}
+
+
+// Materialize an integer constant into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
+
+  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
+      VT != MVT::i8 && VT != MVT::i1) 
+    return 0;
+
+  const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
+                                   &PPC::GPRCRegClass);
+
+  // If the constant is in range, use a load-immediate.
+  const ConstantInt *CI = cast<ConstantInt>(C);
+  if (isInt<16>(CI->getSExtValue())) {
+    unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
+    unsigned ImmReg = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ImmReg)
+      .addImm(CI->getSExtValue());
+    return ImmReg;
+  }
+
+  // Construct the constant piecewise.
+  int64_t Imm = CI->getZExtValue();
+
+  if (VT == MVT::i64)
+    return PPCMaterialize64BitInt(Imm, RC);
+  else if (VT == MVT::i32)
+    return PPCMaterialize32BitInt(Imm, RC);
+
+  return 0;
+}
+
+// Materialize a constant into a register, and return the register
+// number (or zero if we failed to handle it).
+unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) {
+  EVT CEVT = TLI.getValueType(C->getType(), true);
+
+  // Only handle simple types.
+  if (!CEVT.isSimple()) return 0;
+  MVT VT = CEVT.getSimpleVT();
+
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+    return PPCMaterializeFP(CFP, VT);
+  else if (isa<ConstantInt>(C))
+    return PPCMaterializeInt(C, VT);
+  // TBD: Global values.
+
+  return 0;
+}
+
+// Materialize the address created by an alloca into a register, and
+// return the register number (or zero if we failed to handle it).  TBD.
+unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+  return AI && 0;
+}
+
+// Fold loads into extends when possible.  TBD.
+bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+                                      const LoadInst *LI) {
+  return MI && OpNo && LI && false;
+}
+
+// Attempt to lower call arguments in a faster way than done by
+// the selection DAG code.
+bool PPCFastISel::FastLowerArguments() {
+  // Defer to normal argument lowering for now.  It's reasonably
+  // efficient.  Consider doing something like ARM to handle the
+  // case where all args fit in registers, no varargs, no float
+  // or vector args.
+  return false;
+}
+
+namespace llvm {
+  // Create the fast instruction selector for PowerPC64 ELF.
+  FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
+                                const TargetLibraryInfo *LibInfo) {
+    const TargetMachine &TM = FuncInfo.MF->getTarget();
+
+    // Only available on 64-bit ELF for now.
+    const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
+    if (Subtarget->isPPC64() && Subtarget->isSVR4ABI())
+      return new PPCFastISel(FuncInfo, LibInfo);
+
+    return 0;
+  }
+}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index abb2be67135d0bc4bb08867d1ea809eaa812caad..aadab824ba583fd9b2bb2f2a4239d74025e65267 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7885,3 +7885,9 @@ Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
    return Sched::ILP;
  }
  
+// Create a fast isel object.
+FastISel *
+PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
+                                  const TargetLibraryInfo *LibInfo) const {
+  return PPC::createFastISel(FuncInfo, LibInfo);
+}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h

index 1b728a78cdab690a2099b3707fc8988c042c6d18..aa5e8219290019fdedd5ccdfafbe1b44debfea75 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -465,6 +465,11 @@ namespace llvm {
      /// expanded to fmul + fadd.
      virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
  
+    /// createFastISel - This method returns a target-specific FastISel object,
+    /// or null if the target does not support "fast" instruction selection.
+    virtual FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
+                                     const TargetLibraryInfo *LibInfo) const;
+
    private:
      SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
      SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
@@ -624,6 +629,11 @@ namespace llvm {
      SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const;
    };
  
+  namespace PPC {
+    FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
+                             const TargetLibraryInfo *LibInfo);
+  }
+
    bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                                    CCValAssign::LocInfo &LocInfo,
                                    ISD::ArgFlagsTy &ArgFlags,
author	Bill Schmidt <wschmidt@linux.vnet.ibm.com>
	Tue, 30 Jul 2013 00:50:39 +0000 (00:50 +0000)
committer	Bill Schmidt <wschmidt@linux.vnet.ibm.com>
	Tue, 30 Jul 2013 00:50:39 +0000 (00:50 +0000)
lib/Target/PowerPC/CMakeLists.txt		patch \| blob \| history
lib/Target/PowerPC/Makefile		patch \| blob \| history
lib/Target/PowerPC/PPCFastISel.cpp	[new file with mode: 0644]	patch \| blob
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.h		patch \| blob \| history