[AArch64] Support selecting STNP.

[oota-llvm.git] / lib / Target / AArch64 / AArch64ISelDAGToDAG.cpp
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

index 1ea4abcf05fa88821f31f993681341ccfdfd5c2d..77896af196d03f6b92c32a0041e88d1c22beaa16 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -53,9 +53,7 @@ public:
    }
  
    bool runOnMachineFunction(MachineFunction &MF) override {
-    ForCodeSize =
-        MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
-        MF.getFunction()->hasFnAttribute(Attribute::MinSize);
+    ForCodeSize = MF.getFunction()->optForSize();
      Subtarget = &MF.getSubtarget<AArch64Subtarget>();
      return SelectionDAGISel::runOnMachineFunction(MF);
    }
@@ -79,6 +77,21 @@ public:
    bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
      return SelectShiftedRegister(N, true, Reg, Shift);
    }
+  bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
+    return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
+  }
+  bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
+    return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
+  }
+  bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
+    return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
+  }
+  bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
+    return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
+  }
+  bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
+    return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
+  }
    bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
      return SelectAddrModeIndexed(N, 1, Base, OffImm);
    }
@@ -155,6 +168,7 @@ public:
    SDNode *SelectBitfieldInsertOp(SDNode *N);
  
    SDNode *SelectLIBM(SDNode *N);
+  SDNode *SelectFPConvertWithRound(SDNode *N);
  
    SDNode *SelectReadRegister(SDNode *N);
    SDNode *SelectWriteRegister(SDNode *N);
@@ -165,6 +179,8 @@ public:
  private:
    bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
                               SDValue &Shift);
+  bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
+                               SDValue &OffImm);
    bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
                               SDValue &OffImm);
    bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
@@ -185,6 +201,9 @@ private:
    }
  
    bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
+
+  SDNode *GenerateInexactFlagIfNeeded(const SDValue &In, unsigned InTyVariant,
+                                      SDLoc DL);
  };
  } // end anonymous namespace
  
@@ -604,16 +623,33 @@ static bool isWorthFoldingADDlow(SDValue N) {
    return true;
  }
  
+/// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
+/// immediate" address.  The "Size" argument is the size in bytes of the memory
+/// reference, which determines the scale.
+bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
+                                                  SDValue &Base,
+                                                  SDValue &OffImm) {
+  SDLoc dl(N);
+  // Base only. The address will be materialized into a register before
+  // the memory is accessed.
+  //    add x0, Xbase, #offset
+  //    stp x1, x2, [x0]
+  Base = N;
+  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
+  return true;
+}
+
  /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
  /// immediate" address.  The "Size" argument is the size in bytes of the memory
  /// reference, which determines the scale.
  bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
                                                SDValue &Base, SDValue &OffImm) {
    SDLoc dl(N);
+  const DataLayout &DL = CurDAG->getDataLayout();
    const TargetLowering *TLI = getTargetLowering();
    if (N.getOpcode() == ISD::FrameIndex) {
      int FI = cast<FrameIndexSDNode>(N)->getIndex();
-    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
      OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
      return true;
    }
@@ -628,10 +664,9 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
  
      const GlobalValue *GV = GAN->getGlobal();
      unsigned Alignment = GV->getAlignment();
-    const DataLayout *DL = TLI->getDataLayout();
      Type *Ty = GV->getType()->getElementType();
      if (Alignment == 0 && Ty->isSized())
-      Alignment = DL->getABITypeAlignment(Ty);
+      Alignment = DL.getABITypeAlignment(Ty);
  
      if (Alignment >= Size)
        return true;
@@ -645,7 +680,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
          Base = N.getOperand(0);
          if (Base.getOpcode() == ISD::FrameIndex) {
            int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-          Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+          Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
          }
          OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
          return true;
@@ -688,7 +723,8 @@ bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
        if (Base.getOpcode() == ISD::FrameIndex) {
          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
          const TargetLowering *TLI = getTargetLowering();
-        Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+        Base = CurDAG->getTargetFrameIndex(
+            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
        }
        OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
        return true;
@@ -1033,6 +1069,8 @@ SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
        // it into an i64.
        DstVT = MVT::i32;
      }
+  } else if (VT == MVT::f16) {
+    Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
    } else if (VT == MVT::f32) {
      Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
    } else if (VT == MVT::f64 || VT.is64BitVector()) {
@@ -1494,7 +1532,7 @@ static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
  }
  
  static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
-                                       unsigned &LSB, unsigned &MSB,
+                                       unsigned &Immr, unsigned &Imms,
                                         bool BiggerPattern) {
    assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
           "N must be a SHR/SRA operation to call this function");
@@ -1508,7 +1546,7 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
           "Type checking must have been done before calling this function");
  
    // Check for AND + SRL doing several bits extract.
-  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, LSB, MSB))
+  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
      return true;
  
    // we're looking for a shift of a shift
@@ -1548,13 +1586,9 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
  
    assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
           "bad amount in shift node!");
-  // Note: The width operand is encoded as width-1.
-  unsigned Width = VT.getSizeInBits() - Trunc_bits - Srl_imm - 1;
-  int sLSB = Srl_imm - Shl_imm;
-  if (sLSB < 0)
-    return false;
-  LSB = sLSB;
-  MSB = LSB + Width;
+  int immr = Srl_imm - Shl_imm;
+  Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
+  Imms = VT.getSizeInBits() - Shl_imm - Trunc_bits - 1;
    // SRA requires a signed extraction
    if (VT == MVT::i32)
      Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
@@ -1564,7 +1598,7 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
  }
  
  static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
-                                SDValue &Opd0, unsigned &LSB, unsigned &MSB,
+                                SDValue &Opd0, unsigned &Immr, unsigned &Imms,
                                  unsigned NumberOfIgnoredLowBits = 0,
                                  bool BiggerPattern = false) {
    if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
@@ -1576,11 +1610,11 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
        return false;
      break;
    case ISD::AND:
-    return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB,
+    return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
                                        NumberOfIgnoredLowBits, BiggerPattern);
    case ISD::SRL:
    case ISD::SRA:
-    return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern);
+    return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
    }
  
    unsigned NOpc = N->getMachineOpcode();
@@ -1593,8 +1627,8 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
    case AArch64::UBFMXri:
      Opc = NOpc;
      Opd0 = N->getOperand(0);
-    LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
-    MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+    Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+    Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
      return true;
    }
    // Unreachable
@@ -1602,9 +1636,9 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
  }
  
  SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
-  unsigned Opc, LSB, MSB;
+  unsigned Opc, Immr, Imms;
    SDValue Opd0;
-  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB))
+  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
      return nullptr;
  
    EVT VT = N->getValueType(0);
@@ -1613,8 +1647,8 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
    // If the bit extract operation is 64bit but the original type is 32bit, we
    // need to add one EXTRACT_SUBREG.
    if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
-    SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, dl, MVT::i64),
-                       CurDAG->getTargetConstant(MSB, dl, MVT::i64)};
+    SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
+                       CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
  
      SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
      SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
@@ -1624,8 +1658,8 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
      return Node;
    }
  
-  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, dl, VT),
-                   CurDAG->getTargetConstant(MSB, dl, VT)};
+  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
+                   CurDAG->getTargetConstant(Imms, dl, VT)};
    return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
  }
  
@@ -2019,11 +2053,29 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
    return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
  }
  
+/// GenerateInexactFlagIfNeeded - Insert FRINTX instruction to generate inexact
+/// signal on round-to-integer operations if needed. C11 leaves it
+/// implementation-defined whether these operations trigger an inexact
+/// exception. IEEE says they don't.  Unfortunately, Darwin decided they do so
+/// we sometimes have to insert a special instruction just to set the right bit
+/// in FPSR.
+SDNode *AArch64DAGToDAGISel::GenerateInexactFlagIfNeeded(const SDValue &In,
+                                                         unsigned InTyVariant,
+                                                         SDLoc DL) {
+  if (Subtarget->isTargetDarwin() && !TM.Options.UnsafeFPMath) {
+    // Pick the right FRINTX using InTyVariant needed to set the flags.
+    // InTyVariant is 0 for 32-bit and 1 for 64-bit.
+    unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
+    return CurDAG->getMachineNode(FRINTXOpcs[InTyVariant], DL,
+                                  In.getValueType(), MVT::Glue, In);
+  }
+  return nullptr;
+}
+
  SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) {
    EVT VT = N->getValueType(0);
    unsigned Variant;
    unsigned Opc;
-  unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
  
    if (VT == MVT::f32) {
      Variant = 0;
@@ -2032,9 +2084,6 @@ SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) {
    } else
      return nullptr; // Unrecognized argument type. Fall back on default codegen.
  
-  // Pick the FRINTX variant needed to set the flags.
-  unsigned FRINTXOpc = FRINTXOpcs[Variant];
-
    switch (N->getOpcode()) {
    default:
      return nullptr; // Unrecognized libm ISD node. Fall back on default codegen.
@@ -2065,14 +2114,97 @@ SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) {
    SmallVector<SDValue, 2> Ops;
    Ops.push_back(In);
  
-  if (!TM.Options.UnsafeFPMath) {
-    SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In);
-    Ops.push_back(SDValue(FRINTX, 1));
-  }
+  if (SDNode *FRINTXNode = GenerateInexactFlagIfNeeded(In, Variant, dl))
+    Ops.push_back(SDValue(FRINTXNode, 1));
  
    return CurDAG->getMachineNode(Opc, dl, VT, Ops);
  }
  
+/// SelectFPConvertWithRound - Try to combine FP rounding and
+/// FP-INT conversion.
+SDNode *AArch64DAGToDAGISel::SelectFPConvertWithRound(SDNode *N) {
+  SDNode *Op0 = N->getOperand(0).getNode();
+
+  // Return if the round op is used by other nodes, as this would result in two
+  // FRINTX, one each for round and convert.
+  if (!Op0->hasOneUse())
+    return nullptr;
+
+  unsigned InTyVariant;
+  EVT InTy = Op0->getValueType(0);
+  if (InTy == MVT::f32)
+    InTyVariant = 0;
+  else if (InTy == MVT::f64)
+    InTyVariant = 1;
+  else
+    return nullptr;
+
+  unsigned OutTyVariant;
+  EVT OutTy = N->getValueType(0);
+  if (OutTy == MVT::i32)
+    OutTyVariant = 0;
+  else if (OutTy == MVT::i64)
+    OutTyVariant = 1;
+  else
+    return nullptr;
+
+  assert((N->getOpcode() == ISD::FP_TO_SINT
+          || N->getOpcode() == ISD::FP_TO_UINT) && "Unexpected opcode!");
+  unsigned FpConVariant = N->getOpcode() == ISD::FP_TO_SINT ? 0 : 1;
+
+  unsigned Opc;
+  switch (Op0->getOpcode()) {
+  default:
+    return nullptr;
+  case ISD::FCEIL: {
+    unsigned FCVTPOpcs[2][2][2] = {
+        { { AArch64::FCVTPSUWSr, AArch64::FCVTPSUXSr },
+          { AArch64::FCVTPSUWDr, AArch64::FCVTPSUXDr } },
+        { { AArch64::FCVTPUUWSr, AArch64::FCVTPUUXSr },
+          { AArch64::FCVTPUUWDr, AArch64::FCVTPUUXDr } } };
+    Opc = FCVTPOpcs[FpConVariant][InTyVariant][OutTyVariant];
+    break;
+  }
+  case ISD::FFLOOR: {
+    unsigned FCVTMOpcs[2][2][2] = {
+        { { AArch64::FCVTMSUWSr, AArch64::FCVTMSUXSr },
+          { AArch64::FCVTMSUWDr, AArch64::FCVTMSUXDr } },
+        { { AArch64::FCVTMUUWSr, AArch64::FCVTMUUXSr },
+          { AArch64::FCVTMUUWDr, AArch64::FCVTMUUXDr } } };
+    Opc = FCVTMOpcs[FpConVariant][InTyVariant][OutTyVariant];
+    break;
+  }
+  case ISD::FTRUNC: {
+    unsigned FCVTZOpcs[2][2][2] = {
+        { { AArch64::FCVTZSUWSr, AArch64::FCVTZSUXSr },
+          { AArch64::FCVTZSUWDr, AArch64::FCVTZSUXDr } },
+        { { AArch64::FCVTZUUWSr, AArch64::FCVTZUUXSr },
+          { AArch64::FCVTZUUWDr, AArch64::FCVTZUUXDr } } };
+    Opc = FCVTZOpcs[FpConVariant][InTyVariant][OutTyVariant];
+    break;
+  }
+  case ISD::FROUND: {
+    unsigned FCVTAOpcs[2][2][2] = {
+        { { AArch64::FCVTASUWSr, AArch64::FCVTASUXSr },
+          { AArch64::FCVTASUWDr, AArch64::FCVTASUXDr } },
+        { { AArch64::FCVTAUUWSr, AArch64::FCVTAUUXSr },
+          { AArch64::FCVTAUUWDr, AArch64::FCVTAUUXDr } } };
+    Opc = FCVTAOpcs[FpConVariant][InTyVariant][OutTyVariant];
+    break;
+  }
+  }
+
+  SDLoc DL(N);
+  SDValue In = Op0->getOperand(0);
+  SmallVector<SDValue, 2> Ops;
+  Ops.push_back(In);
+
+  if (SDNode *FRINTXNode = GenerateInexactFlagIfNeeded(In, InTyVariant, DL))
+    Ops.push_back(SDValue(FRINTXNode, 1));
+
+  return CurDAG->getMachineNode(Opc, DL, OutTy, Ops);
+}
+
  bool
  AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
                                                unsigned RegWidth) {
@@ -2351,7 +2483,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
      int FI = cast<FrameIndexSDNode>(Node)->getIndex();
      unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
      const TargetLowering *TLI = getTargetLowering();
-    SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+    SDValue TFI = CurDAG->getTargetFrameIndex(
+        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
      SDLoc DL(Node);
      SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
                        CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
@@ -2804,6 +2937,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
        break;
      }
      }
+    break;
    }
    case AArch64ISD::LD2post: {
      if (VT == MVT::v8i8)
@@ -3224,6 +3358,12 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
      if (SDNode *I = SelectLIBM(Node))
        return I;
      break;
+
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+    if (SDNode *I = SelectFPConvertWithRound(Node))
+      return I;
+    break;
    }
  
    // Select the default instruction