Remove getSubtargetImpl from AArch64ISelLowering and cache the

[oota-llvm.git] / lib / Target / AArch64 / AArch64ISelLowering.cpp
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index 70cf7599a395d1cc737e9d7544c5adab4dc8124f..64f6e5657683599ef5bf82b2bea4d154c67de269 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -12,6 +12,7 @@
  //===----------------------------------------------------------------------===//
  
  #include "AArch64ISelLowering.h"
+#include "AArch64CallingConvention.h"
  #include "AArch64MachineFunctionInfo.h"
  #include "AArch64PerfectShuffle.h"
  #include "AArch64Subtarget.h"
@@ -66,10 +67,9 @@ EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
                           cl::desc("Allow AArch64 SLI/SRI formation"),
                           cl::init(false));
  
-
-AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
-    : TargetLowering(TM) {
-  Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
+                                             const AArch64Subtarget &STI)
+    : TargetLowering(TM), Subtarget(&STI) {
  
    // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
    // we have to make something up. Arbitrarily, choose ZeroOrOne.
@@ -386,13 +386,24 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
      setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
    }
  
+  // Make floating-point constants legal for the large code model, so they don't
+  // become loads from the constant pool.
+  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
+    setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+    setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+  }
+
    // AArch64 does not have floating-point extending loads, i1 sign-extending
    // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
-  setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
-  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
-  setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
-  setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand);
-  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand);
+  for (MVT VT : MVT::fp_valuetypes()) {
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
+  }
+  for (MVT VT : MVT::integer_valuetypes())
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
+
    setTruncStoreAction(MVT::f32, MVT::f16, Expand);
    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
    setTruncStoreAction(MVT::f64, MVT::f16, Expand);
@@ -531,26 +542,22 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
      setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
      // Likewise, narrowing and extending vector loads/stores aren't handled
      // directly.
-    for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-         VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
-
-      setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,
-                         Expand);
-
-      setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand);
-      setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
-      setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);
-      setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
-
-      setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
-
-      for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-           InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
-        setTruncStoreAction((MVT::SimpleValueType)VT,
-                            (MVT::SimpleValueType)InnerVT, Expand);
-      setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
-      setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
-      setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+    for (MVT VT : MVT::vector_valuetypes()) {
+      setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+
+      setOperationAction(ISD::MULHS, VT, Expand);
+      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+      setOperationAction(ISD::MULHU, VT, Expand);
+      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
+      setOperationAction(ISD::BSWAP, VT, Expand);
+
+      for (MVT InnerVT : MVT::vector_valuetypes()) {
+        setTruncStoreAction(VT, InnerVT, Expand);
+        setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
+        setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
+        setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
+      }
      }
  
      // AArch64 has implementations of a lot of rounding-like FP operations.
@@ -615,7 +622,8 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
    setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
    setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
    setOperationAction(ISD::VSELECT, VT.getSimpleVT(), Expand);
-  setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
+  for (MVT InnerVT : MVT::all_valuetypes())
+    setLoadExtAction(ISD::EXTLOAD, InnerVT, VT.getSimpleVT(), Expand);
  
    // CNT supports only B element sizes.
    if (VT != MVT::v8i8 && VT != MVT::v16i8)
@@ -869,9 +877,8 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
    // EndBB:
    //     Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
  
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
    MachineFunction *MF = MBB->getParent();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
    const BasicBlock *LLVM_BB = MBB->getBasicBlock();
    DebugLoc DL = MI->getDebugLoc();
    MachineFunction::iterator It = MBB;
@@ -1981,6 +1988,8 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
      llvm_unreachable("Unsupported calling convention.");
    case CallingConv::WebKit_JS:
      return CC_AArch64_WebKit_JS;
+  case CallingConv::GHC:
+    return CC_AArch64_GHC;
    case CallingConv::C:
    case CallingConv::Fast:
      if (!Subtarget->isTargetDarwin())
@@ -2106,7 +2115,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
        unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
  
        uint32_t BEAlign = 0;
-      if (ArgSize < 8 && !Subtarget->isLittleEndian())
+      if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
+          !Ins[i].Flags.isInConsecutiveRegs())
          BEAlign = 8 - ArgSize;
  
        int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
@@ -2349,7 +2359,9 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
    // cannot rely on the linker replacing the tail call with a return.
    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
      const GlobalValue *GV = G->getGlobal();
-    if (GV->hasExternalWeakLinkage())
+    const Triple TT(getTargetMachine().getTargetTriple());
+    if (GV->hasExternalWeakLinkage() &&
+        (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
        return false;
    }
  
@@ -2660,7 +2672,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
        unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
                                          : VA.getValVT().getSizeInBits();
        OpSize = (OpSize + 7) / 8;
-      if (!Subtarget->isLittleEndian() && !Flags.isByVal()) {
+      if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
+          !Flags.isInConsecutiveRegs()) {
          if (OpSize < 8)
            BEAlign = 8 - OpSize;
        }
@@ -2782,19 +2795,16 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
  
    // Add a register mask operand representing the call-preserved registers.
    const uint32_t *Mask;
-  const TargetRegisterInfo *TRI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
-  const AArch64RegisterInfo *ARI =
-      static_cast<const AArch64RegisterInfo *>(TRI);
+  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
    if (IsThisReturn) {
      // For 'this' returns, use the X0-preserving mask if applicable
-    Mask = ARI->getThisReturnPreservedMask(CallConv);
+    Mask = TRI->getThisReturnPreservedMask(CallConv);
      if (!Mask) {
        IsThisReturn = false;
-      Mask = ARI->getCallPreservedMask(CallConv);
+      Mask = TRI->getCallPreservedMask(CallConv);
      }
    } else
-    Mask = ARI->getCallPreservedMask(CallConv);
+    Mask = TRI->getCallPreservedMask(CallConv);
  
    assert(Mask && "Missing call preserved mask for calling convention");
    Ops.push_back(DAG.getRegisterMask(Mask));
@@ -3014,11 +3024,8 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
    // TLS calls preserve all registers except those that absolutely must be
    // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
    // silly).
-  const TargetRegisterInfo *TRI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
-  const AArch64RegisterInfo *ARI =
-      static_cast<const AArch64RegisterInfo *>(TRI);
-  const uint32_t *Mask = ARI->getTLSCallPreservedMask();
+  const uint32_t *Mask =
+      Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
  
    // Finally, we can make the call. This is just a degenerate version of a
    // normal AArch64 call node: x0 takes the address of the descriptor, and
@@ -3065,11 +3072,8 @@ SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
    // TLS calls preserve all registers except those that absolutely must be
    // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
    // silly).
-  const TargetRegisterInfo *TRI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
-  const AArch64RegisterInfo *ARI =
-      static_cast<const AArch64RegisterInfo *>(TRI);
-  const uint32_t *Mask = ARI->getTLSCallPreservedMask();
+  const uint32_t *Mask =
+      Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
  
    // The function takes only one argument: the address of the descriptor itself
    // in X0.
@@ -4615,19 +4619,21 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
        // The extraction can just take the second half
        Src.ShuffleVec =
            DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
-                      DAG.getIntPtrConstant(NumSrcElts));
+                      DAG.getConstant(NumSrcElts, MVT::i64));
        Src.WindowBase = -NumSrcElts;
      } else if (Src.MaxElt < NumSrcElts) {
        // The extraction can just take the first half
-      Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
-                                   Src.ShuffleVec, DAG.getIntPtrConstant(0));
+      Src.ShuffleVec =
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+                      DAG.getConstant(0, MVT::i64));
      } else {
        // An actual VEXT is needed
-      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
-                                     Src.ShuffleVec, DAG.getIntPtrConstant(0));
+      SDValue VEXTSrc1 =
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+                      DAG.getConstant(0, MVT::i64));
        SDValue VEXTSrc2 =
            DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
-                      DAG.getIntPtrConstant(NumSrcElts));
+                      DAG.getConstant(NumSrcElts, MVT::i64));
        unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
  
        Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
@@ -6948,7 +6954,8 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
    return SDValue();
  }
  
-static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
+                                     const AArch64Subtarget *Subtarget) {
    // First try to optimize away the conversion when it's conditionally from
    // a constant. Vectors only.
    SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
@@ -6967,7 +6974,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) {
    // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
    // This eliminates an "integer-to-vector-move UOP and improve throughput.
    SDValue N0 = N->getOperand(0);
-  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+  if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
        // Do not change the width of a volatile load.
        !cast<LoadSDNode>(N0)->isVolatile()) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -7756,9 +7763,9 @@ static SDValue performExtendCombine(SDNode *N,
    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
                                 LoVT.getVectorNumElements());
    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
-                   DAG.getIntPtrConstant(0));
+                   DAG.getConstant(0, MVT::i64));
    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
-                   DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+                   DAG.getConstant(InNVT.getVectorNumElements(), MVT::i64));
    Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
    Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
  
@@ -7839,7 +7846,7 @@ static SDValue performSTORECombine(SDNode *N,
      return SDValue();
  
    // Cyclone has bad performance on unaligned 16B stores when crossing line and
-  // page boundries. We want to split such stores.
+  // page boundaries. We want to split such stores.
    if (!Subtarget->isCyclone())
      return SDValue();
  
@@ -7880,9 +7887,9 @@ static SDValue performSTORECombine(SDNode *N,
    EVT HalfVT =
        EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts);
    SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
-                                   DAG.getIntPtrConstant(0));
+                                   DAG.getConstant(0, MVT::i64));
    SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
-                                   DAG.getIntPtrConstant(NumElts));
+                                   DAG.getConstant(NumElts, MVT::i64));
    SDValue BasePtr = S->getBasePtr();
    SDValue NewST1 =
        DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
@@ -8478,6 +8485,12 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
    // largest real NEON comparison is 64-bits per lane, which means the result is
    // at most 32-bits and an illegal vector. Just bail out for now.
    EVT SrcVT = N0.getOperand(0).getValueType();
+
+  // Don't try to do this optimization when the setcc itself has i1 operands.
+  // There are no legal vectors of i1, so this would be pointless.
+  if (SrcVT == MVT::i1)
+    return SDValue();
+
    int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
    if (!ResVT.isVector() || NumMaskElts == 0)
      return SDValue();
@@ -8518,7 +8531,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
      return performMulCombine(N, DAG, DCI, Subtarget);
    case ISD::SINT_TO_FP:
    case ISD::UINT_TO_FP:
-    return performIntToFpCombine(N, DAG);
+    return performIntToFpCombine(N, DAG, Subtarget);
    case ISD::OR:
      return performORCombine(N, DCI, Subtarget);
    case ISD::INTRINSIC_WO_CHAIN:
@@ -8696,13 +8709,12 @@ bool AArch64TargetLowering::getPostIndexedAddressParts(
  
  static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
                                    SelectionDAG &DAG) {
-  if (N->getValueType(0) != MVT::i16)
-    return;
-
    SDLoc DL(N);
    SDValue Op = N->getOperand(0);
-  assert(Op.getValueType() == MVT::f16 &&
-         "Inconsistent bitcast? Only 16-bit types should be i16 or f16");
+
+  if (N->getValueType(0) != MVT::i16 || Op.getValueType() != MVT::f16)
+    return;
+
    Op = SDValue(
        DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
                           DAG.getUNDEF(MVT::i32), Op,
@@ -8842,3 +8854,8 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
                  Val, Stxr->getFunctionType()->getParamType(0)),
        Addr);
  }
+
+bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
+    Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
+  return Ty->isArrayTy();
+}