Fix return sequence on armv4 thumb

[oota-llvm.git] / lib / Target / ARM / ARMISelLowering.cpp
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index f9fc641b12d61c0620e22e1bc3dec98b89cc4132..3e53af42b897d74d455dfab0cb8b0d18d39517b5 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -166,8 +166,8 @@ static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
  ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
      : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
    Subtarget = &TM.getSubtarget<ARMSubtarget>();
-  RegInfo = TM.getRegisterInfo();
-  Itins = TM.getInstrItineraryData();
+  RegInfo = TM.getSubtargetImpl()->getRegisterInfo();
+  Itins = TM.getSubtargetImpl()->getInstrItineraryData();
  
    setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
  
@@ -312,6 +312,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
        // Conversions between floating types.
        // RTABI chapter 4.1.2, Table 7
        { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+      { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
        { RTLIB::FPEXT_F32_F64,   "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
  
        // Integer to floating-point conversions.
@@ -396,8 +397,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
      addRegisterClass(MVT::f32, &ARM::SPRRegClass);
      if (!Subtarget->isFPOnlySP())
        addRegisterClass(MVT::f64, &ARM::DPRRegClass);
-
-    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
    }
  
    for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -582,8 +581,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
  
    computeRegisterProperties();
  
-  // ARM does not have f32 extending load.
+  // ARM does not have floating-point extending loads.
    setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+
+  // ... or truncating stores
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+  setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f16, Expand);
  
    // ARM does not have i1 sign extending load.
    setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
@@ -825,10 +830,17 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
        setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
        setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
      }
-    // Special handling for half-precision FP.
+
+    // v8 adds f64 <-> f16 conversion. Before that it should be expanded.
+    if (!Subtarget->hasV8Ops()) {
+      setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+      setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+    }
+
+    // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
      if (!Subtarget->hasFP16()) {
-      setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
-      setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
+      setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+      setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
      }
    }
  
@@ -1119,7 +1131,8 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
  
    // Load are scheduled for latency even if there instruction itinerary
    // is not available.
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const TargetInstrInfo *TII =
+      getTargetMachine().getSubtargetImpl()->getInstrInfo();
    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
  
    if (MCID.getNumDefs() == 0)
@@ -1654,6 +1667,19 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
        assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
        Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
                             DAG.getTargetGlobalAddress(GV, dl, getPointerTy()));
+    } else if (Subtarget->isTargetCOFF()) {
+      assert(Subtarget->isTargetWindows() &&
+             "Windows is the only supported COFF target");
+      unsigned TargetFlags = GV->hasDLLImportStorageClass()
+                                 ? ARMII::MO_DLLIMPORT
+                                 : ARMII::MO_NO_FLAG;
+      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0,
+                                          TargetFlags);
+      if (GV->hasDLLImportStorageClass())
+        Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                             DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(),
+                                         Callee), MachinePointerInfo::getGOT(),
+                             false, false, false, 0);
      } else {
        // On ELF targets for PIC code, direct calls should go through the PLT
        unsigned OpFlags = 0;
@@ -1695,7 +1721,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
  
    // FIXME: handle tail calls differently.
    unsigned CallOpc;
-  bool HasMinSizeAttr = Subtarget->isMinSize();
+  bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute(
+      AttributeSet::FunctionIndex, Attribute::MinSize);
    if (Subtarget->isThumb()) {
      if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
        CallOpc = ARMISD::CALL_NOLINK;
@@ -1726,7 +1753,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
    // Add a register mask operand representing the call-preserved registers.
    if (!isTailCall) {
      const uint32_t *Mask;
-    const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+    const TargetRegisterInfo *TRI =
+        getTargetMachine().getSubtargetImpl()->getRegisterInfo();
      const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
      if (isThisReturn) {
        // For 'this' returns, use the R0-preserving mask if applicable
@@ -1981,7 +2009,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
        // the caller's fixed stack objects.
        MachineFrameInfo *MFI = MF.getFrameInfo();
        const MachineRegisterInfo *MRI = &MF.getRegInfo();
-      const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+      const TargetInstrInfo *TII =
+          getTargetMachine().getSubtargetImpl()->getInstrInfo();
        for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
             i != e;
             ++i, ++realArgIdx) {
@@ -2084,6 +2113,10 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
    RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
    bool isLittleEndian = Subtarget->isLittle();
  
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  AFI->setReturnRegsCount(RVLocs.size());
+
    // Copy the result values into the output registers.
    for (unsigned i = 0, realRVLocIdx = 0;
         i != RVLocs.size();
@@ -2333,7 +2366,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
    TargetLowering::CallLoweringInfo CLI(DAG);
    CLI.setDebugLoc(dl).setChain(Chain)
      .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
-               DAG.getExternalSymbol("__tls_get_addr", PtrVT), &Args, 0);
+               DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args),
+               0);
  
    std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
    return CallResult.first;
@@ -2441,7 +2475,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
  
    // If we have T2 ops, we can materialize the address directly via movt/movw
    // pair. This is always cheaper.
-  if (Subtarget->useMovt()) {
+  if (Subtarget->useMovt(DAG.getMachineFunction())) {
      ++NumMovwMovt;
      // FIXME: Once remat is capable of dealing with instructions with register
      // operands, expand this into two nodes.
@@ -2463,7 +2497,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
    const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
    Reloc::Model RelocM = getTargetMachine().getRelocationModel();
  
-  if (Subtarget->useMovt())
+  if (Subtarget->useMovt(DAG.getMachineFunction()))
      ++NumMovwMovt;
  
    // FIXME: Once remat is capable of dealing with instructions with register
@@ -2483,18 +2517,27 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
  SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
                                                       SelectionDAG &DAG) const {
    assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
-  assert(Subtarget->useMovt() && "Windows on ARM expects to use movw/movt");
+  assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
+         "Windows on ARM expects to use movw/movt");
  
    const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  const ARMII::TOF TargetFlags =
+    (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
    EVT PtrVT = getPointerTy();
+  SDValue Result;
    SDLoc DL(Op);
  
    ++NumMovwMovt;
  
    // FIXME: Once remat is capable of dealing with instructions with register
    // operands, expand this into two nodes.
-  return DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
-                     DAG.getTargetGlobalAddress(GV, DL, PtrVT));
+  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
+                       DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
+                                                  TargetFlags));
+  if (GV->hasDLLImportStorageClass())
+    Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
+                         MachinePointerInfo::getGOT(), false, false, false, 0);
+  return Result;
  }
  
  SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
@@ -2542,6 +2585,11 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
    SDLoc dl(Op);
    switch (IntNo) {
    default: return SDValue();    // Don't custom lower most intrinsics.
+  case Intrinsic::arm_rbit: {
+    assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+           "RBIT intrinsic must have i32 type!");
+    return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(0));
+  }
    case Intrinsic::arm_thread_pointer: {
      EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
      return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
@@ -2710,7 +2758,10 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
      NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
    }
  
-  unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+  unsigned Align = MF.getTarget()
+                       .getSubtargetImpl()
+                       ->getFrameLowering()
+                       ->getStackAlignment();
    ArgRegsSize = NumGPRs * 4;
  
    // If parameter is split between stack and GPRs...
@@ -4499,6 +4550,11 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
        BitMask <<= 8;
        ImmMask <<= 1;
      }
+
+    if (DAG.getTargetLoweringInfo().isBigEndian())
+      // swap higher and lower 32 bit word
+      Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
+
      // Op=1, Cmode=1110.
      OpCmode = 0x1e;
      VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
@@ -5699,7 +5755,7 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
    // operation legalization where we can't create illegal types.
    return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
                          LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
-                        LD->getMemoryVT(), LD->isVolatile(),
+                        LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(),
                          LD->isNonTemporal(), LD->getAlignment());
  }
  
@@ -6085,7 +6141,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
    TargetLowering::CallLoweringInfo CLI(DAG);
    CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
      .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee,
-               &Args, 0)
+               std::move(Args), 0)
      .setDiscardResult();
  
    std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
@@ -6260,7 +6316,8 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
  void ARMTargetLowering::
  SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
                         MachineBasicBlock *DispatchBB, int FI) const {
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const TargetInstrInfo *TII =
+      getTargetMachine().getSubtargetImpl()->getInstrInfo();
    DebugLoc dl = MI->getDebugLoc();
    MachineFunction *MF = MBB->getParent();
    MachineRegisterInfo *MRI = &MF->getRegInfo();
@@ -6375,7 +6432,8 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
  
  MachineBasicBlock *ARMTargetLowering::
  EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const TargetInstrInfo *TII =
+      getTargetMachine().getSubtargetImpl()->getInstrInfo();
    DebugLoc dl = MI->getDebugLoc();
    MachineFunction *MF = MBB->getParent();
    MachineRegisterInfo *MRI = &MF->getRegInfo();
@@ -6892,7 +6950,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
    // This pseudo instruction has 3 operands: dst, src, size
    // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
    // Otherwise, we will generate unrolled scalar copies.
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const TargetInstrInfo *TII =
+      getTargetMachine().getSubtargetImpl()->getInstrInfo();
    const BasicBlock *LLVM_BB = BB->getBasicBlock();
    MachineFunction::iterator It = BB;
    ++It;
@@ -7126,7 +7185,7 @@ MachineBasicBlock *
  ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
                                         MachineBasicBlock *MBB) const {
    const TargetMachine &TM = getTargetMachine();
-  const TargetInstrInfo &TII = *TM.getInstrInfo();
+  const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
    DebugLoc DL = MI->getDebugLoc();
  
    assert(Subtarget->isTargetWindows() &&
@@ -7142,7 +7201,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
    // thumb-2 environment, so there is no interworking required.  As a result, we
    // do not expect a veneer to be emitted by the linker, clobbering IP.
    //
-  // Each module recieves its own copy of __chkstk, so no import thunk is
+  // Each module receives its own copy of __chkstk, so no import thunk is
    // required, again, ensuring that IP is not clobbered.
    //
    // Finally, although some linkers may theoretically provide a trampoline for
@@ -7182,8 +7241,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
  
    AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
                                        ARM::SP)
-                              .addReg(ARM::SP, RegState::Define)
-                              .addReg(ARM::R4, RegState::Kill)));
+                              .addReg(ARM::SP).addReg(ARM::R4)));
  
    MI->eraseFromParent();
    return MBB;
@@ -7192,7 +7250,8 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
  MachineBasicBlock *
  ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                 MachineBasicBlock *BB) const {
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const TargetInstrInfo *TII =
+      getTargetMachine().getSubtargetImpl()->getInstrInfo();
    DebugLoc dl = MI->getDebugLoc();
    bool isThumb2 = Subtarget->isThumb2();
    switch (MI->getOpcode()) {
@@ -7462,8 +7521,8 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
    // Rename pseudo opcodes.
    unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
    if (NewOpc) {
-    const ARMBaseInstrInfo *TII =
-      static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo());
+    const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
+        getTargetMachine().getSubtargetImpl()->getInstrInfo());
      MCID = &TII->get(NewOpc);
  
      assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
@@ -8398,8 +8457,6 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
      if (DCI.DAG.getTargetLoweringInfo().isBigEndian())
        std::swap (NewLD1, NewLD2);
      SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
-    DCI.RemoveFromWorklist(LD);
-    DAG.DeleteNode(LD);
      return Result;
    }
  
@@ -8562,7 +8619,7 @@ static SDValue PerformSTORECombine(SDNode *N,
    return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
                        St->getPointerInfo(), St->isVolatile(),
                        St->isNonTemporal(), St->getAlignment(),
-                      St->getTBAAInfo());
+                      St->getAAInfo());
  }
  
  /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
@@ -9652,8 +9709,10 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
    return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
  }
  
-bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, unsigned,
-                                                      bool *Fast) const {
+bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+                                                       unsigned,
+                                                       unsigned,
+                                                       bool *Fast) const {
    // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
    bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
  
@@ -9707,11 +9766,12 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
      bool Fast;
      if (Size >= 16 &&
          (memOpAlign(SrcAlign, DstAlign, 16) ||
-         (allowsUnalignedMemoryAccesses(MVT::v2f64, 0, &Fast) && Fast))) {
+         (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) {
        return MVT::v2f64;
      } else if (Size >= 8 &&
                 (memOpAlign(SrcAlign, DstAlign, 8) ||
-                (allowsUnalignedMemoryAccesses(MVT::f64, 0, &Fast) && Fast))) {
+                (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) &&
+                 Fast))) {
        return MVT::f64;
      }
    }
@@ -10554,7 +10614,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
    SDLoc dl(Op);
    TargetLowering::CallLoweringInfo CLI(DAG);
    CLI.setDebugLoc(dl).setChain(InChain)
-    .setCallee(getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+    .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
      .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
  
    std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
@@ -10577,7 +10637,7 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
    Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
    Flag = Chain.getValue(1);
  
-  SDVTList NodeTys = DAG.getVTList(MVT::i32, MVT::Glue);
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
    Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
  
    SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
@@ -10760,6 +10820,11 @@ bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
    return Inst->getType()->getPrimitiveSizeInBits() <= AtomicLimit;
  }
  
+// This has so far only been implemented for MachO.
+bool ARMTargetLowering::useLoadStackGuardNode() const {
+  return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO;
+}
+
  Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
                                           AtomicOrdering Ord) const {
    Module *M = Builder.GetInsertBlock()->getParent()->getParent();