Invert the TargetLowering flag that controls divide by consant expansion.

[oota-llvm.git] / lib / Target / PowerPC / PPCISelLowering.cpp
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index 8490adb3c333a6568e2fd79716b637d882e23e04..c9ed2bae1e3a720a07596b9cbd58aaa1675fe5a0 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1,4 +1,4 @@
-//===-- PPC32ISelLowering.cpp - PPC32 DAG Lowering Implementation ---------===//
+//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,30 +7,35 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// This file implements the PPC32ISelLowering class.
+// This file implements the PPCISelLowering class.
  //
  //===----------------------------------------------------------------------===//
  
-#include "PPC32ISelLowering.h"
-#include "PPC32TargetMachine.h"
+#include "PPCISelLowering.h"
+#include "PPCTargetMachine.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SSARegMap.h"
  #include "llvm/Constants.h"
  #include "llvm/Function.h"
  using namespace llvm;
  
-PPC32TargetLowering::PPC32TargetLowering(TargetMachine &TM)
+PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
    : TargetLowering(TM) {
      
    // Fold away setcc operations if possible.
    setSetCCIsExpensive();
+  setPow2DivIsCheap();
    
+  // Use _setjmp/_longjmp instead of setjmp/longjmp.
+  setUseUnderscoreSetJmpLongJmp(true);
+    
    // Set up the register classes.
-  addRegisterClass(MVT::i32, PPC32::GPRCRegisterClass);
-  addRegisterClass(MVT::f32, PPC32::FPRCRegisterClass);
-  addRegisterClass(MVT::f64, PPC32::FPRCRegisterClass);
+  addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
+  addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
+  addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
    
    // PowerPC has no intrinsics for these particular operations
    setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
@@ -48,13 +53,13 @@ PPC32TargetLowering::PPC32TargetLowering(TargetMachine &TM)
    // We don't support sin/cos/sqrt/fmod
    setOperationAction(ISD::FSIN , MVT::f64, Expand);
    setOperationAction(ISD::FCOS , MVT::f64, Expand);
-  setOperationAction(ISD::SREM , MVT::f64, Expand);
+  setOperationAction(ISD::FREM , MVT::f64, Expand);
    setOperationAction(ISD::FSIN , MVT::f32, Expand);
    setOperationAction(ISD::FCOS , MVT::f32, Expand);
-  setOperationAction(ISD::SREM , MVT::f32, Expand);
+  setOperationAction(ISD::FREM , MVT::f32, Expand);
    
    // If we're enabling GP optimizations, use hardware square root
-  if (!TM.getSubtarget<PPCSubtarget>().isGigaProcessor()) {
+  if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
      setOperationAction(ISD::FSQRT, MVT::f64, Expand);
      setOperationAction(ISD::FSQRT, MVT::f32, Expand);
    }
@@ -72,11 +77,6 @@ PPC32TargetLowering::PPC32TargetLowering(TargetMachine &TM)
    setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
    setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
    
-  // PowerPC wants to expand i64 shifts itself.
-  setOperationAction(ISD::SHL, MVT::i64, Custom);
-  setOperationAction(ISD::SRL, MVT::i64, Custom);
-  setOperationAction(ISD::SRA, MVT::i64, Custom);
-
    // PowerPC does not have BRCOND* which requires SetCC
    setOperationAction(ISD::BRCOND,       MVT::Other, Expand);
    setOperationAction(ISD::BRCONDTWOWAY, MVT::Other, Expand);
@@ -86,11 +86,32 @@ PPC32TargetLowering::PPC32TargetLowering(TargetMachine &TM)
    
    // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
-  
+
    // PowerPC does not have [U|S]INT_TO_FP
    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
  
+  // PowerPC does not have truncstore for i1.
+  setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote);
+  
+  if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {
+    // They also have instructions for converting between i64 and fp.
+    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+  }
+
+  if (TM.getSubtarget<PPCSubtarget>().has64BitRegs()) {
+    // 64 bit PowerPC implementations can support i64 types directly
+    addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
+    // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+  } else {
+    // 32 bit PowerPC wants to expand i64 shifts itself.
+    setOperationAction(ISD::SHL, MVT::i64, Custom);
+    setOperationAction(ISD::SRL, MVT::i64, Custom);
+    setOperationAction(ISD::SRA, MVT::i64, Custom);
+  }
+  
    setSetCCResultContents(ZeroOrOneSetCCResult);
    
    computeRegisterProperties();
@@ -111,21 +132,51 @@ static bool isFloatingPointZero(SDOperand Op) {
  
  /// LowerOperation - Provide custom lowering hooks for some operations.
  ///
-SDOperand PPC32TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
+SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
    switch (Op.getOpcode()) {
    default: assert(0 && "Wasn't expecting to be able to lower this!"); 
    case ISD::FP_TO_SINT: {
-    assert(Op.getValueType() == MVT::i32 &&
-           MVT::isFloatingPoint(Op.getOperand(0).getValueType()));
-    Op = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Op.getOperand(0));
+    assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));
+    SDOperand Src = Op.getOperand(0);
+    if (Src.getValueType() == MVT::f32)
+      Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src);
+    
+    switch (Op.getValueType()) {
+    default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
+    case MVT::i32:
+      Op = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src);
+      break;
+    case MVT::i64:
+      Op = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src);
+      break;
+    }
     
      int FrameIdx =
        DAG.getMachineFunction().getFrameInfo()->CreateStackObject(8, 8);
      SDOperand FI = DAG.getFrameIndex(FrameIdx, MVT::i32);
      SDOperand ST = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(),
                                 Op, FI, DAG.getSrcValue(0));
-    FI = DAG.getNode(ISD::ADD, MVT::i32, FI, DAG.getConstant(4, MVT::i32));
-    return DAG.getLoad(MVT::i32, ST, FI, DAG.getSrcValue(0));
+    if (Op.getOpcode() == PPCISD::FCTIDZ) {
+      Op = DAG.getLoad(MVT::i64, ST, FI, DAG.getSrcValue(0));
+    } else {
+      FI = DAG.getNode(ISD::ADD, MVT::i32, FI, DAG.getConstant(4, MVT::i32));
+      Op = DAG.getLoad(MVT::i32, ST, FI, DAG.getSrcValue(0));
+    }
+    return Op;
+  }
+  case ISD::SINT_TO_FP: {
+    assert(MVT::i64 == Op.getOperand(0).getValueType() && 
+           "Unhandled SINT_TO_FP type in custom expander!");
+    int FrameIdx =
+      DAG.getMachineFunction().getFrameInfo()->CreateStackObject(8, 8);
+    SDOperand FI = DAG.getFrameIndex(FrameIdx, MVT::i32);
+    SDOperand ST = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(),
+                               Op.getOperand(0), FI, DAG.getSrcValue(0));
+    SDOperand LD = DAG.getLoad(MVT::f64, ST, FI, DAG.getSrcValue(0));
+    SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, LD);
+    if (MVT::f32 == Op.getValueType())
+      FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
+    return FP;
    }
    case ISD::SELECT_CC: {
      // Turn FP only select_cc's into fsel instructions.
@@ -168,19 +219,19 @@ SDOperand PPC32TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
      case ISD::SETULT:
      case ISD::SETLT:
        return DAG.getNode(PPCISD::FSEL, ResVT,
-                         DAG.getNode(ISD::SUB, CmpVT, LHS, RHS), FV, TV);
+                         DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS), FV, TV);
      case ISD::SETUGE:
      case ISD::SETGE:
        return DAG.getNode(PPCISD::FSEL, ResVT,
-                         DAG.getNode(ISD::SUB, CmpVT, LHS, RHS), TV, FV);
+                         DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS), TV, FV);
      case ISD::SETUGT:
      case ISD::SETGT:
        return DAG.getNode(PPCISD::FSEL, ResVT,
-                         DAG.getNode(ISD::SUB, CmpVT, RHS, LHS), FV, TV);
+                         DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS), FV, TV);
      case ISD::SETULE:
      case ISD::SETLE:
        return DAG.getNode(PPCISD::FSEL, ResVT,
-                         DAG.getNode(ISD::SUB, CmpVT, RHS, LHS), TV, FV);
+                         DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS), TV, FV);
      }
      break;
    }
@@ -267,19 +318,16 @@ SDOperand PPC32TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
  }
  
  std::vector<SDOperand>
-PPC32TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
+PPCTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
    //
    // add beautiful description of PPC stack frame format, or at least some docs
    //
    MachineFunction &MF = DAG.getMachineFunction();
    MachineFrameInfo *MFI = MF.getFrameInfo();
    MachineBasicBlock& BB = MF.front();
+  SSARegMap *RegMap = MF.getSSARegMap();
    std::vector<SDOperand> ArgValues;
    
-  // Due to the rather complicated nature of the PowerPC ABI, rather than a
-  // fixed size array of physical args, for the sake of simplicity let the STL
-  // handle tracking them for us.
-  std::vector<unsigned> argVR, argPR, argOp;
    unsigned ArgOffset = 24;
    unsigned GPR_remaining = 8;
    unsigned FPR_remaining = 13;
@@ -312,9 +360,9 @@ PPC32TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
        ObjSize = 4;
        if (!ArgLive) break;
        if (GPR_remaining > 0) {
-        MF.addLiveIn(GPR[GPR_idx]);
-        argt = newroot = DAG.getCopyFromReg(DAG.getRoot(),
-                                            GPR[GPR_idx], MVT::i32);
+        unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
+        MF.addLiveIn(GPR[GPR_idx], VReg);
+        argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
          if (ObjectVT != MVT::i32) {
            unsigned AssertOp = I->getType()->isSigned() ? ISD::AssertSext 
                                                         : ISD::AssertZext;
@@ -330,15 +378,17 @@ PPC32TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
        if (!ArgLive) break;
        if (GPR_remaining > 0) {
          SDOperand argHi, argLo;
-        MF.addLiveIn(GPR[GPR_idx]);
-        argHi = DAG.getCopyFromReg(DAG.getRoot(), GPR[GPR_idx], MVT::i32);
+        unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
+        MF.addLiveIn(GPR[GPR_idx], VReg);
+        argHi = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
          // If we have two or more remaining argument registers, then both halves
          // of the i64 can be sourced from there.  Otherwise, the lower half will
          // have to come off the stack.  This can happen when an i64 is preceded
          // by 28 bytes of arguments.
          if (GPR_remaining > 1) {
-          MF.addLiveIn(GPR[GPR_idx+1]);
-          argLo = DAG.getCopyFromReg(argHi, GPR[GPR_idx+1], MVT::i32);
+          unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
+          MF.addLiveIn(GPR[GPR_idx+1], VReg);
+          argLo = DAG.getCopyFromReg(argHi, VReg, MVT::i32);
          } else {
            int FI = MFI->CreateFixedObject(4, ArgOffset+4);
            SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
@@ -357,9 +407,13 @@ PPC32TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
        ObjSize = (ObjectVT == MVT::f64) ? 8 : 4;
        if (!ArgLive) break;
        if (FPR_remaining > 0) {
-        MF.addLiveIn(FPR[FPR_idx]);
-        argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), 
-                                            FPR[FPR_idx], ObjectVT);
+        unsigned VReg;
+        if (ObjectVT == MVT::f32)
+          VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass);
+        else
+          VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass);
+        MF.addLiveIn(FPR[FPR_idx], VReg);
+        argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, ObjectVT);
          --FPR_remaining;
          ++FPR_idx;
        } else {
@@ -406,8 +460,9 @@ PPC32TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
      // result of va_next.
      std::vector<SDOperand> MemOps;
      for (; GPR_remaining > 0; --GPR_remaining, ++GPR_idx) {
-      MF.addLiveIn(GPR[GPR_idx]);
-      SDOperand Val = DAG.getCopyFromReg(DAG.getRoot(), GPR[GPR_idx], MVT::i32);
+      unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
+      MF.addLiveIn(GPR[GPR_idx], VReg);
+      SDOperand Val = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
        SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
                                      Val, FIN, DAG.getSrcValue(NULL));
        MemOps.push_back(Store);
@@ -442,11 +497,11 @@ PPC32TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
  }
  
  std::pair<SDOperand, SDOperand>
-PPC32TargetLowering::LowerCallTo(SDOperand Chain,
-                                 const Type *RetTy, bool isVarArg,
-                                 unsigned CallingConv, bool isTailCall,
-                                 SDOperand Callee, ArgListTy &Args,
-                                 SelectionDAG &DAG) {
+PPCTargetLowering::LowerCallTo(SDOperand Chain,
+                               const Type *RetTy, bool isVarArg,
+                               unsigned CallingConv, bool isTailCall,
+                               SDOperand Callee, ArgListTy &Args,
+                               SelectionDAG &DAG) {
    // args_to_use will accumulate outgoing args for the ISD::CALL case in
    // SelectExpr to use to put the arguments in the appropriate registers.
    std::vector<SDOperand> args_to_use;
@@ -639,8 +694,21 @@ PPC32TargetLowering::LowerCallTo(SDOperand Chain,
    return std::make_pair(RetVal, Chain);
  }
  
-SDOperand PPC32TargetLowering::LowerVAStart(SDOperand Chain, SDOperand VAListP,
-                                            Value *VAListV, SelectionDAG &DAG) {
+SDOperand PPCTargetLowering::LowerReturnTo(SDOperand Chain, SDOperand Op,
+                                           SelectionDAG &DAG) {
+  if (Op.getValueType() == MVT::i64) {
+    SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op, 
+                               DAG.getConstant(1, MVT::i32));
+    SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op,
+                               DAG.getConstant(0, MVT::i32));
+    return DAG.getNode(ISD::RET, MVT::Other, Chain, Lo, Hi);
+  } else {
+    return DAG.getNode(ISD::RET, MVT::Other, Chain, Op);
+  }
+}
+
+SDOperand PPCTargetLowering::LowerVAStart(SDOperand Chain, SDOperand VAListP,
+                                          Value *VAListV, SelectionDAG &DAG) {
    // vastart just stores the address of the VarArgsFrameIndex slot into the
    // memory location argument.
    SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
@@ -649,9 +717,9 @@ SDOperand PPC32TargetLowering::LowerVAStart(SDOperand Chain, SDOperand VAListP,
  }
  
  std::pair<SDOperand,SDOperand>
-PPC32TargetLowering::LowerVAArg(SDOperand Chain,
-                                SDOperand VAListP, Value *VAListV,
-                                const Type *ArgTy, SelectionDAG &DAG) {
+PPCTargetLowering::LowerVAArg(SDOperand Chain,
+                              SDOperand VAListP, Value *VAListV,
+                              const Type *ArgTy, SelectionDAG &DAG) {
    MVT::ValueType ArgVT = getValueType(ArgTy);
    
    SDOperand VAList =
@@ -673,7 +741,7 @@ PPC32TargetLowering::LowerVAArg(SDOperand Chain,
  }
  
  
-std::pair<SDOperand, SDOperand> PPC32TargetLowering::
+std::pair<SDOperand, SDOperand> PPCTargetLowering::
  LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
                          SelectionDAG &DAG) {
    assert(0 && "LowerFrameReturnAddress unimplemented");
@@ -681,10 +749,11 @@ LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
  }
  
  MachineBasicBlock *
-PPC32TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
-                                             MachineBasicBlock *BB) {
+PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
+                                           MachineBasicBlock *BB) {
    assert((MI->getOpcode() == PPC::SELECT_CC_Int ||
-          MI->getOpcode() == PPC::SELECT_CC_FP) &&
+          MI->getOpcode() == PPC::SELECT_CC_F4 ||
+          MI->getOpcode() == PPC::SELECT_CC_F8) &&
           "Unexpected instr type to insert");
    
    // To "insert" a SELECT_CC instruction, we actually have to insert the diamond