Renamed CCState members that appear to misspell 'Processed' as 'Proceed'. NFC.

[oota-llvm.git] / lib / Target / ARM / ARMISelLowering.cpp
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index 97b62264462d6a1b53f532e01d5e9f4736963c18..eb444d57cffae2810c48da73e059dd253e5abc46 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -164,7 +164,7 @@ static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
    return new ARMElfTargetObjectFile();
  }
  
-ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
+ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
      : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
    Subtarget = &TM.getSubtarget<ARMSubtarget>();
    RegInfo = TM.getSubtargetImpl()->getRegisterInfo();
@@ -881,8 +881,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
        setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
      }
  
-    // v8 adds f64 <-> f16 conversion. Before that it should be expanded.
-    if (!Subtarget->hasV8Ops()) {
+    // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
+    if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
        setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
        setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
      }
@@ -898,7 +898,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
    if (Subtarget->hasSinCos()) {
      setLibcallName(RTLIB::SINCOS_F32, "sincosf");
      setLibcallName(RTLIB::SINCOS_F64, "sincos");
-    if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
+    if (Subtarget->getTargetTriple().isiOS()) {
        // For iOS, we don't want to the normal expansion of a libcall to
        // sincos. We want to issue a libcall to __sincos_stret.
        setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
@@ -906,16 +906,21 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
      }
    }
  
-  // ARMv8 implements a lot of rounding-like FP operations.
-  if (Subtarget->hasV8Ops()) {
-    static MVT RoundingTypes[] = {MVT::f32, MVT::f64};
-    for (const auto Ty : RoundingTypes) {
-      setOperationAction(ISD::FFLOOR, Ty, Legal);
-      setOperationAction(ISD::FCEIL, Ty, Legal);
-      setOperationAction(ISD::FROUND, Ty, Legal);
-      setOperationAction(ISD::FTRUNC, Ty, Legal);
-      setOperationAction(ISD::FNEARBYINT, Ty, Legal);
-      setOperationAction(ISD::FRINT, Ty, Legal);
+  // FP-ARMv8 implements a lot of rounding-like FP operations.
+  if (Subtarget->hasFPARMv8()) {
+    setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+    setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+    setOperationAction(ISD::FROUND, MVT::f32, Legal);
+    setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+    setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+    setOperationAction(ISD::FRINT, MVT::f32, Legal);
+    if (!Subtarget->isFPOnlySP()) {
+      setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+      setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+      setOperationAction(ISD::FROUND, MVT::f64, Legal);
+      setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+      setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+      setOperationAction(ISD::FRINT, MVT::f64, Legal);
      }
    }
    // We have target-specific dag combine patterns for the following nodes:
@@ -1585,7 +1590,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
        // True if this byval aggregate will be split between registers
        // and memory.
        unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
-      unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
+      unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
  
        if (CurByValIdx < ByValArgsCount) {
  
@@ -3061,7 +3066,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
          if (Flags.isByVal()) {
            unsigned ExtraArgRegsSize;
            unsigned ExtraArgRegsSaveSize;
-          computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(),
+          computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(),
                           Flags.getByValSize(),
                           ExtraArgRegsSize, ExtraArgRegsSaveSize);
  
@@ -3185,7 +3190,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
            // Since they could be overwritten by lowering of arguments in case of
            // a tail call.
            if (Flags.isByVal()) {
-            unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
+            unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
  
              ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
              int FrameIndex = StoreByValRegs(
@@ -3240,6 +3245,18 @@ static bool isFloatingPointZero(SDValue Op) {
          if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
            return CFP->getValueAPF().isPosZero();
      }
+  } else if (Op->getOpcode() == ISD::BITCAST &&
+             Op->getValueType(0) == MVT::f64) {
+    // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
+    // created by LowerConstantFP().
+    SDValue BitcastOp = Op->getOperand(0);
+    if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) {
+      SDValue MoveOp = BitcastOp->getOperand(0);
+      if (MoveOp->getOpcode() == ISD::TargetConstant &&
+          cast<ConstantSDNode>(MoveOp)->getZExtValue() == 0) {
+        return true;
+      }
+    }
    }
    return false;
  }
@@ -3615,12 +3632,18 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
      //   select c, a, b
      // We only do this in unsafe-fp-math, because signed zeros and NaNs are
      // handled differently than the original code sequence.
-    if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal &&
-        RHS == FalseVal) {
-      if (CC == ISD::SETOGT || CC == ISD::SETUGT)
-        return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
-      if (CC == ISD::SETOLT || CC == ISD::SETULT)
-        return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+    if (getTargetMachine().Options.UnsafeFPMath) {
+      if (LHS == TrueVal && RHS == FalseVal) {
+        if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+          return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
+        if (CC == ISD::SETOLT || CC == ISD::SETULT)
+          return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+      } else if (LHS == FalseVal && RHS == TrueVal) {
+        if (CC == ISD::SETOLT || CC == ISD::SETULT)
+          return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
+        if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+          return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+      }
      }
  
      bool swpCmpOps = false;
@@ -6566,9 +6589,9 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
                     .addReg(NewVReg2, RegState::Kill)
                     .addReg(NewVReg3, RegState::Kill));
      unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
-    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5)
-                   .addFrameIndex(FI)
-                   .addImm(36)); // &jbuf[1] :: pc
+    BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
+            .addFrameIndex(FI)
+            .addImm(36); // &jbuf[1] :: pc
      AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
                     .addReg(NewVReg4, RegState::Kill)
                     .addReg(NewVReg5, RegState::Kill)
@@ -7668,12 +7691,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
  
  void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
                                                        SDNode *Node) const {
-  if (!MI->hasPostISelHook()) {
-    assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
-           "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
-    return;
-  }
-
    const MCInstrDesc *MCID = &MI->getDesc();
    // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
    // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
@@ -11024,11 +11041,11 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
  }
  
  // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
-void ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
                                           AtomicOrdering Ord, bool IsStore,
                                           bool IsLoad) const {
    if (!getInsertFencesForAtomic())
-    return;
+    return nullptr;
  
    switch (Ord) {
    case NotAtomic:
@@ -11036,27 +11053,27 @@ void ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
      llvm_unreachable("Invalid fence: unordered/non-atomic");
    case Monotonic:
    case Acquire:
-    return; // Nothing to do
+    return nullptr; // Nothing to do
    case SequentiallyConsistent:
      if (!IsStore)
-      return; // Nothing to do
-              /*FALLTHROUGH*/
+      return nullptr; // Nothing to do
+    /*FALLTHROUGH*/
    case Release:
    case AcquireRelease:
      if (Subtarget->isSwift())
-      makeDMB(Builder, ARM_MB::ISHST);
+      return makeDMB(Builder, ARM_MB::ISHST);
      // FIXME: add a comment with a link to documentation justifying this.
      else
-      makeDMB(Builder, ARM_MB::ISH);
-    return;
+      return makeDMB(Builder, ARM_MB::ISH);
    }
+  llvm_unreachable("Unknown fence ordering in emitLeadingFence");
  }
  
-void ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
                                            AtomicOrdering Ord, bool IsStore,
                                            bool IsLoad) const {
    if (!getInsertFencesForAtomic())
-    return;
+    return nullptr;
  
    switch (Ord) {
    case NotAtomic:
@@ -11064,13 +11081,13 @@ void ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
      llvm_unreachable("Invalid fence: unordered/not-atomic");
    case Monotonic:
    case Release:
-    return; // Nothing to do
+    return nullptr; // Nothing to do
    case Acquire:
    case AcquireRelease:
-    case SequentiallyConsistent:
-    makeDMB(Builder, ARM_MB::ISH);
-    return;
+  case SequentiallyConsistent:
+    return makeDMB(Builder, ARM_MB::ISH);
    }
+  llvm_unreachable("Unknown fence ordering in emitTrailingFence");
  }
  
  // Loads and stores less than 64-bits are already atomic; ones above that
@@ -11106,6 +11123,35 @@ bool ARMTargetLowering::useLoadStackGuardNode() const {
    return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO;
  }
  
+bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
+                                                  unsigned &Cost) const {
+  // If we do not have NEON, vector types are not natively supported.
+  if (!Subtarget->hasNEON())
+    return false;
+
+  // Floating point values and vector values map to the same register file.
+  // Therefore, althought we could do a store extract of a vector type, this is
+  // better to leave at float as we have more freedom in the addressing mode for
+  // those.
+  if (VectorTy->isFPOrFPVectorTy())
+    return false;
+
+  // If the index is unknown at compile time, this is very expensive to lower
+  // and it is not possible to combine the store with the extract.
+  if (!isa<ConstantInt>(Idx))
+    return false;
+
+  assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
+  unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+  // We can do a store + vector extract on any vector that fits perfectly in a D
+  // or Q register.
+  if (BitWidth == 64 || BitWidth == 128) {
+    Cost = 0;
+    return true;
+  }
+  return false;
+}
+
  Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
                                           AtomicOrdering Ord) const {
    Module *M = Builder.GetInsertBlock()->getParent()->getParent();