lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

   1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the AArch64 target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #define DEBUG_TYPE "aarch64-isel"
  15 #include "AArch64.h"
  16 #include "AArch64InstrInfo.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "Utils/AArch64BaseInfo.h"
  20 #include "llvm/ADT/APSInt.h"
  21 #include "llvm/CodeGen/SelectionDAGISel.h"
  22 #include "llvm/IR/GlobalValue.h"
  23 #include "llvm/Support/Debug.h"
  24 #include "llvm/Support/raw_ostream.h"
  25
  26 using namespace llvm;
  27
  28 //===--------------------------------------------------------------------===//
  29 /// AArch64 specific code to select AArch64 machine instructions for
  30 /// SelectionDAG operations.
  31 ///
  32 namespace {
  33
  34 class AArch64DAGToDAGISel : public SelectionDAGISel {
  35   AArch64TargetMachine &TM;
  36
  37   /// Keep a pointer to the AArch64Subtarget around so that we can
  38   /// make the right decision when generating code for different targets.
  39   const AArch64Subtarget *Subtarget;
  40
  41 public:
  42   explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
  43                                CodeGenOpt::Level OptLevel)
  44     : SelectionDAGISel(tm, OptLevel), TM(tm),
  45       Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
  46   }
  47
  48   virtual const char *getPassName() const {
  49     return "AArch64 Instruction Selection";
  50   }
  51
  52   // Include the pieces autogenerated from the target description.
  53 #include "AArch64GenDAGISel.inc"
  54
  55   template<unsigned MemSize>
  56   bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) {
  57     const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
  58     if (!CN || CN->getZExtValue() % MemSize != 0
  59         || CN->getZExtValue() / MemSize > 0xfff)
  60       return false;
  61
  62     UImm12 =  CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64);
  63     return true;
  64   }
  65
  66   template<unsigned RegWidth>
  67   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
  68     return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
  69   }
  70
  71   /// Used for pre-lowered address-reference nodes, so we already know
  72   /// the fields match. This operand's job is simply to add an
  73   /// appropriate shift operand to the MOVZ/MOVK instruction.
  74   template<unsigned LogShift>
  75   bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) {
  76     Imm = N;
  77     Shift = CurDAG->getTargetConstant(LogShift, MVT::i32);
  78     return true;
  79   }
  80
  81   bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
  82
  83   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
  84                                 unsigned RegWidth);
  85
  86   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
  87                                     char ConstraintCode,
  88                                     std::vector<SDValue> &OutOps);
  89
  90   bool SelectLogicalImm(SDValue N, SDValue &Imm);
  91
  92   template<unsigned RegWidth>
  93   bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) {
  94     return SelectTSTBOperand(N, FixedPos, RegWidth);
  95   }
  96
  97   bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
  98
  99   SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32,
 100                        unsigned Op64);
 101
 102   /// Put the given constant into a pool and return a DAG which will give its
 103   /// address.
 104   SDValue getConstantPoolItemAddress(SDLoc DL, const Constant *CV);
 105
 106   SDNode *TrySelectToMoveImm(SDNode *N);
 107   SDNode *LowerToFPLitPool(SDNode *Node);
 108   SDNode *SelectToLitPool(SDNode *N);
 109
 110   SDNode* Select(SDNode*);
 111 private:
 112   /// Select NEON load intrinsics.  NumVecs should be 1, 2, 3 or 4.
 113   SDNode *SelectVLD(SDNode *N, unsigned NumVecs, bool isUpdating,
 114                     const uint16_t *Opcode);
 115
 116   /// Select NEON store intrinsics.  NumVecs should be 1, 2, 3 or 4.
 117   SDNode *SelectVST(SDNode *N, unsigned NumVecs, bool isUpdating,
 118                     const uint16_t *Opcodes);
 119
 120   // Form pairs of consecutive 64-bit/128-bit registers.
 121   SDNode *createDPairNode(SDValue V0, SDValue V1);
 122   SDNode *createQPairNode(SDValue V0, SDValue V1);
 123
 124   // Form sequences of 3 consecutive 64-bit/128-bit registers.
 125   SDNode *createDTripleNode(SDValue V0, SDValue V1, SDValue V2);
 126   SDNode *createQTripleNode(SDValue V0, SDValue V1, SDValue V2);
 127
 128   // Form sequences of 4 consecutive 64-bit/128-bit registers.
 129   SDNode *createDQuadNode(SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 130   SDNode *createQQuadNode(SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 131 };
 132 }
 133
 134 bool
 135 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
 136                                               unsigned RegWidth) {
 137   const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
 138   if (!CN) return false;
 139
 140   // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
 141   // is between 1 and 32 for a destination w-register, or 1 and 64 for an
 142   // x-register.
 143   //
 144   // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
 145   // want THIS_NODE to be 2^fbits. This is much easier to deal with using
 146   // integers.
 147   bool IsExact;
 148
 149   // fbits is between 1 and 64 in the worst-case, which means the fmul
 150   // could have 2^64 as an actual operand. Need 65 bits of precision.
 151   APSInt IntVal(65, true);
 152   CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
 153
 154   // N.b. isPowerOf2 also checks for > 0.
 155   if (!IsExact || !IntVal.isPowerOf2()) return false;
 156   unsigned FBits = IntVal.logBase2();
 157
 158   // Checks above should have guaranteed that we haven't lost information in
 159   // finding FBits, but it must still be in range.
 160   if (FBits == 0 || FBits > RegWidth) return false;
 161
 162   FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32);
 163   return true;
 164 }
 165
 166 bool
 167 AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
 168                                                  char ConstraintCode,
 169                                                  std::vector<SDValue> &OutOps) {
 170   switch (ConstraintCode) {
 171   default: llvm_unreachable("Unrecognised AArch64 memory constraint");
 172   case 'm':
 173     // FIXME: more freedom is actually permitted for 'm'. We can go
 174     // hunting for a base and an offset if we want. Of course, since
 175     // we don't really know how the operand is going to be used we're
 176     // probably restricted to the load/store pair's simm7 as an offset
 177     // range anyway.
 178   case 'Q':
 179     OutOps.push_back(Op);
 180   }
 181
 182   return false;
 183 }
 184
 185 bool
 186 AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) {
 187   ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N);
 188   if (!Imm || !Imm->getValueAPF().isPosZero())
 189     return false;
 190
 191   // Doesn't actually carry any information, but keeps TableGen quiet.
 192   Dummy = CurDAG->getTargetConstant(0, MVT::i32);
 193   return true;
 194 }
 195
 196 bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) {
 197   uint32_t Bits;
 198   uint32_t RegWidth = N.getValueType().getSizeInBits();
 199
 200   ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
 201   if (!CN) return false;
 202
 203   if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits))
 204     return false;
 205
 206   Imm = CurDAG->getTargetConstant(Bits, MVT::i32);
 207   return true;
 208 }
 209
 210 SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) {
 211   SDNode *ResNode;
 212   SDLoc dl(Node);
 213   EVT DestType = Node->getValueType(0);
 214   unsigned DestWidth = DestType.getSizeInBits();
 215
 216   unsigned MOVOpcode;
 217   EVT MOVType;
 218   int UImm16, Shift;
 219   uint32_t LogicalBits;
 220
 221   uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue();
 222   if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) {
 223     MOVType = DestType;
 224     MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii;
 225   } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) {
 226     MOVType = DestType;
 227     MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii;
 228   } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) {
 229     // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can
 230     // use a 32-bit instruction: "movn w0, 0xedbc".
 231     MOVType = MVT::i32;
 232     MOVOpcode = AArch64::MOVNwii;
 233   } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits))  {
 234     MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi;
 235     uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR;
 236
 237     return CurDAG->getMachineNode(MOVOpcode, dl, DestType,
 238                               CurDAG->getRegister(ZR, DestType),
 239                               CurDAG->getTargetConstant(LogicalBits, MVT::i32));
 240   } else {
 241     // Can't handle it in one instruction. There's scope for permitting two (or
 242     // more) instructions, but that'll need more thought.
 243     return NULL;
 244   }
 245
 246   ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType,
 247                                    CurDAG->getTargetConstant(UImm16, MVT::i32),
 248                                    CurDAG->getTargetConstant(Shift, MVT::i32));
 249
 250   if (MOVType != DestType) {
 251     ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
 252                           MVT::i64, MVT::i32, MVT::Other,
 253                           CurDAG->getTargetConstant(0, MVT::i64),
 254                           SDValue(ResNode, 0),
 255                           CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32));
 256   }
 257
 258   return ResNode;
 259 }
 260
 261 SDValue
 262 AArch64DAGToDAGISel::getConstantPoolItemAddress(SDLoc DL,
 263                                                 const Constant *CV) {
 264   EVT PtrVT = getTargetLowering()->getPointerTy();
 265
 266   switch (getTargetLowering()->getTargetMachine().getCodeModel()) {
 267   case CodeModel::Small: {
 268     unsigned Alignment =
 269       getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType());
 270     return CurDAG->getNode(
 271         AArch64ISD::WrapperSmall, DL, PtrVT,
 272         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG),
 273         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12),
 274         CurDAG->getConstant(Alignment, MVT::i32));
 275   }
 276   case CodeModel::Large: {
 277     SDNode *LitAddr;
 278     LitAddr = CurDAG->getMachineNode(
 279         AArch64::MOVZxii, DL, PtrVT,
 280         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3),
 281         CurDAG->getTargetConstant(3, MVT::i32));
 282     LitAddr = CurDAG->getMachineNode(
 283         AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
 284         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC),
 285         CurDAG->getTargetConstant(2, MVT::i32));
 286     LitAddr = CurDAG->getMachineNode(
 287         AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
 288         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC),
 289         CurDAG->getTargetConstant(1, MVT::i32));
 290     LitAddr = CurDAG->getMachineNode(
 291         AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
 292         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC),
 293         CurDAG->getTargetConstant(0, MVT::i32));
 294     return SDValue(LitAddr, 0);
 295   }
 296   default:
 297     llvm_unreachable("Only small and large code models supported now");
 298   }
 299 }
 300
 301 SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
 302   SDLoc DL(Node);
 303   uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue();
 304   int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue();
 305   EVT DestType = Node->getValueType(0);
 306
 307   // Since we may end up loading a 64-bit constant from a 32-bit entry the
 308   // constant in the pool may have a different type to the eventual node.
 309   ISD::LoadExtType Extension;
 310   EVT MemType;
 311
 312   assert((DestType == MVT::i64 || DestType == MVT::i32)
 313          && "Only expect integer constants at the moment");
 314
 315   if (DestType == MVT::i32) {
 316     Extension = ISD::NON_EXTLOAD;
 317     MemType = MVT::i32;
 318   } else if (UnsignedVal <= UINT32_MAX) {
 319     Extension = ISD::ZEXTLOAD;
 320     MemType = MVT::i32;
 321   } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) {
 322     Extension = ISD::SEXTLOAD;
 323     MemType = MVT::i32;
 324   } else {
 325     Extension = ISD::NON_EXTLOAD;
 326     MemType = MVT::i64;
 327   }
 328
 329   Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(),
 330                                                   MemType.getSizeInBits()),
 331                                   UnsignedVal);
 332   SDValue PoolAddr = getConstantPoolItemAddress(DL, CV);
 333   unsigned Alignment =
 334     getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType());
 335
 336   return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(),
 337                             PoolAddr,
 338                             MachinePointerInfo::getConstantPool(), MemType,
 339                             /* isVolatile = */ false,
 340                             /* isNonTemporal = */ false,
 341                             Alignment).getNode();
 342 }
 343
 344 SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) {
 345   SDLoc DL(Node);
 346   const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue();
 347   EVT DestType = Node->getValueType(0);
 348
 349   unsigned Alignment =
 350     getTargetLowering()->getDataLayout()->getABITypeAlignment(FV->getType());
 351   SDValue PoolAddr = getConstantPoolItemAddress(DL, FV);
 352
 353   return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr,
 354                          MachinePointerInfo::getConstantPool(),
 355                          /* isVolatile = */ false,
 356                          /* isNonTemporal = */ false,
 357                          /* isInvariant = */ true,
 358                          Alignment).getNode();
 359 }
 360
 361 bool
 362 AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
 363                                        unsigned RegWidth) {
 364   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
 365   if (!CN) return false;
 366
 367   uint64_t Val = CN->getZExtValue();
 368
 369   if (!isPowerOf2_64(Val)) return false;
 370
 371   unsigned TestedBit = Log2_64(Val);
 372   // Checks above should have guaranteed that we haven't lost information in
 373   // finding TestedBit, but it must still be in range.
 374   if (TestedBit >= RegWidth) return false;
 375
 376   FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64);
 377   return true;
 378 }
 379
 380 SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
 381                                           unsigned Op16,unsigned Op32,
 382                                           unsigned Op64) {
 383   // Mostly direct translation to the given operations, except that we preserve
 384   // the AtomicOrdering for use later on.
 385   AtomicSDNode *AN = cast<AtomicSDNode>(Node);
 386   EVT VT = AN->getMemoryVT();
 387
 388   unsigned Op;
 389   if (VT == MVT::i8)
 390     Op = Op8;
 391   else if (VT == MVT::i16)
 392     Op = Op16;
 393   else if (VT == MVT::i32)
 394     Op = Op32;
 395   else if (VT == MVT::i64)
 396     Op = Op64;
 397   else
 398     llvm_unreachable("Unexpected atomic operation");
 399
 400   SmallVector<SDValue, 4> Ops;
 401   for (unsigned i = 1; i < AN->getNumOperands(); ++i)
 402       Ops.push_back(AN->getOperand(i));
 403
 404   Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
 405   Ops.push_back(AN->getOperand(0)); // Chain moves to the end
 406
 407   return CurDAG->SelectNodeTo(Node, Op,
 408                               AN->getValueType(0), MVT::Other,
 409                               &Ops[0], Ops.size());
 410 }
 411
 412 SDNode *AArch64DAGToDAGISel::createDPairNode(SDValue V0, SDValue V1) {
 413   SDLoc dl(V0.getNode());
 414   SDValue RegClass =
 415       CurDAG->getTargetConstant(AArch64::DPairRegClassID, MVT::i32);
 416   SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::dsub_0, MVT::i32);
 417   SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::dsub_1, MVT::i32);
 418   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
 419   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v2i64,
 420                                 Ops);
 421 }
 422
 423 SDNode *AArch64DAGToDAGISel::createQPairNode(SDValue V0, SDValue V1) {
 424   SDLoc dl(V0.getNode());
 425   SDValue RegClass =
 426       CurDAG->getTargetConstant(AArch64::QPairRegClassID, MVT::i32);
 427   SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::qsub_0, MVT::i32);
 428   SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::qsub_1, MVT::i32);
 429   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
 430   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v4i64,
 431                                 Ops);
 432 }
 433
 434 SDNode *AArch64DAGToDAGISel::createDTripleNode(SDValue V0, SDValue V1,
 435                                                SDValue V2) {
 436   SDLoc dl(V0.getNode());
 437   SDValue RegClass =
 438       CurDAG->getTargetConstant(AArch64::DTripleRegClassID, MVT::i32);
 439   SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::dsub_0, MVT::i32);
 440   SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::dsub_1, MVT::i32);
 441   SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::dsub_2, MVT::i32);
 442   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2 };
 443   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped,
 444                                 Ops);
 445 }
 446
 447 SDNode *AArch64DAGToDAGISel::createQTripleNode(SDValue V0, SDValue V1,
 448                                                SDValue V2) {
 449   SDLoc dl(V0.getNode());
 450   SDValue RegClass =
 451       CurDAG->getTargetConstant(AArch64::QTripleRegClassID, MVT::i32);
 452   SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::qsub_0, MVT::i32);
 453   SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::qsub_1, MVT::i32);
 454   SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::qsub_2, MVT::i32);
 455   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2 };
 456   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped,
 457                                 Ops);
 458 }
 459
 460 SDNode *AArch64DAGToDAGISel::createDQuadNode(SDValue V0, SDValue V1, SDValue V2,
 461                                              SDValue V3) {
 462   SDLoc dl(V0.getNode());
 463   SDValue RegClass =
 464       CurDAG->getTargetConstant(AArch64::DQuadRegClassID, MVT::i32);
 465   SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::dsub_0, MVT::i32);
 466   SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::dsub_1, MVT::i32);
 467   SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::dsub_2, MVT::i32);
 468   SDValue SubReg3 = CurDAG->getTargetConstant(AArch64::dsub_3, MVT::i32);
 469   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2,  V3,
 470                           SubReg3 };
 471   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v4i64,
 472                                 Ops);
 473 }
 474
 475 SDNode *AArch64DAGToDAGISel::createQQuadNode(SDValue V0, SDValue V1, SDValue V2,
 476                                              SDValue V3) {
 477   SDLoc dl(V0.getNode());
 478   SDValue RegClass =
 479       CurDAG->getTargetConstant(AArch64::QQuadRegClassID, MVT::i32);
 480   SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::qsub_0, MVT::i32);
 481   SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::qsub_1, MVT::i32);
 482   SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::qsub_2, MVT::i32);
 483   SDValue SubReg3 = CurDAG->getTargetConstant(AArch64::qsub_3, MVT::i32);
 484   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2,  V3,
 485                           SubReg3 };
 486   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v8i64,
 487                                 Ops);
 488 }
 489
 490 // Get the register stride update opcode of a VLD/VST instruction that
 491 // is otherwise equivalent to the given fixed stride updating instruction.
 492 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
 493   switch (Opc) {
 494   default: break;
 495   case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register;
 496   case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register;
 497   case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register;
 498   case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register;
 499   case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register;
 500   case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register;
 501   case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register;
 502   case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register;
 503
 504   case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register;
 505   case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register;
 506   case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register;
 507   case AArch64::LD1WB2V_1D_fixed: return AArch64::LD1WB2V_1D_register;
 508   case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register;
 509   case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register;
 510   case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register;
 511   case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register;
 512
 513   case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register;
 514   case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register;
 515   case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register;
 516   case AArch64::LD1WB3V_1D_fixed: return AArch64::LD1WB3V_1D_register;
 517   case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register;
 518   case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register;
 519   case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register;
 520   case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register;
 521
 522   case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register;
 523   case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register;
 524   case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register;
 525   case AArch64::LD1WB4V_1D_fixed: return AArch64::LD1WB4V_1D_register;
 526   case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register;
 527   case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register;
 528   case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register;
 529   case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register;
 530
 531   case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register;
 532   case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register;
 533   case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register;
 534   case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register;
 535   case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register;
 536   case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register;
 537   case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register;
 538   case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register;
 539
 540   case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register;
 541   case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register;
 542   case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register;
 543   case AArch64::ST1WB2V_1D_fixed: return AArch64::ST1WB2V_1D_register;
 544   case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register;
 545   case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register;
 546   case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register;
 547   case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register;
 548
 549   case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register;
 550   case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register;
 551   case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register;
 552   case AArch64::ST1WB3V_1D_fixed: return AArch64::ST1WB3V_1D_register;
 553   case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register;
 554   case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register;
 555   case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register;
 556   case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register;
 557
 558   case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register;
 559   case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register;
 560   case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register;
 561   case AArch64::ST1WB4V_1D_fixed: return AArch64::ST1WB4V_1D_register;
 562   case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register;
 563   case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register;
 564   case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register;
 565   case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register;
 566   }
 567   return Opc; // If not one we handle, return it unchanged.
 568 }
 569
 570 SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
 571                                        bool isUpdating,
 572                                        const uint16_t *Opcodes) {
 573   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
 574
 575   EVT VT = N->getValueType(0);
 576   unsigned OpcodeIndex;
 577   switch (VT.getSimpleVT().SimpleTy) {
 578   default: llvm_unreachable("unhandled vector load type");
 579   case MVT::v8i8:  OpcodeIndex = 0; break;
 580   case MVT::v4i16: OpcodeIndex = 1; break;
 581   case MVT::v2f32:
 582   case MVT::v2i32: OpcodeIndex = 2; break;
 583   case MVT::v1f64:
 584   case MVT::v1i64: OpcodeIndex = 3; break;
 585   case MVT::v16i8: OpcodeIndex = 4; break;
 586   case MVT::v8f16:
 587   case MVT::v8i16: OpcodeIndex = 5; break;
 588   case MVT::v4f32:
 589   case MVT::v4i32: OpcodeIndex = 6; break;
 590   case MVT::v2f64:
 591   case MVT::v2i64: OpcodeIndex = 7; break;
 592   }
 593   unsigned Opc = Opcodes[OpcodeIndex];
 594
 595   SmallVector<SDValue, 2> Ops;
 596   unsigned AddrOpIdx = isUpdating ? 1 : 2;
 597   Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
 598
 599   if (isUpdating) {
 600     SDValue Inc = N->getOperand(AddrOpIdx + 1);
 601     if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
 602       Opc = getVLDSTRegisterUpdateOpcode(Opc);
 603     Ops.push_back(Inc);
 604   }
 605
 606   Ops.push_back(N->getOperand(0)); // Push back the Chain
 607
 608   std::vector<EVT> ResTys;
 609   bool is64BitVector = VT.is64BitVector();
 610
 611   if (NumVecs == 1)
 612     ResTys.push_back(VT);
 613   else if (NumVecs == 3)
 614     ResTys.push_back(MVT::Untyped);
 615   else {
 616     EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
 617                                  is64BitVector ? NumVecs : NumVecs * 2);
 618     ResTys.push_back(ResTy);
 619   }
 620
 621   if (isUpdating)
 622     ResTys.push_back(MVT::i64); // Type of the updated register
 623   ResTys.push_back(MVT::Other); // Type of the Chain
 624   SDLoc dl(N);
 625   SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 626
 627   // Transfer memoperands.
 628   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
 629   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
 630   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
 631
 632   if (NumVecs == 1)
 633     return VLd;
 634
 635   // If NumVecs > 1, the return result is a super register containing 2-4
 636   // consecutive vector registers.
 637   SDValue SuperReg = SDValue(VLd, 0);
 638
 639   unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
 640   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
 641     ReplaceUses(SDValue(N, Vec),
 642                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
 643   // Update users of the Chain
 644   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
 645   if (isUpdating)
 646     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
 647
 648   return NULL;
 649 }
 650
 651 SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
 652                                        bool isUpdating,
 653                                        const uint16_t *Opcodes) {
 654   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
 655   SDLoc dl(N);
 656
 657   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
 658   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
 659
 660   unsigned AddrOpIdx = isUpdating ? 1 : 2;
 661   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
 662   EVT VT = N->getOperand(Vec0Idx).getValueType();
 663   unsigned OpcodeIndex;
 664   switch (VT.getSimpleVT().SimpleTy) {
 665   default: llvm_unreachable("unhandled vector store type");
 666   case MVT::v8i8:  OpcodeIndex = 0; break;
 667   case MVT::v4i16: OpcodeIndex = 1; break;
 668   case MVT::v2f32:
 669   case MVT::v2i32: OpcodeIndex = 2; break;
 670   case MVT::v1f64:
 671   case MVT::v1i64: OpcodeIndex = 3; break;
 672   case MVT::v16i8: OpcodeIndex = 4; break;
 673   case MVT::v8f16:
 674   case MVT::v8i16: OpcodeIndex = 5; break;
 675   case MVT::v4f32:
 676   case MVT::v4i32: OpcodeIndex = 6; break;
 677   case MVT::v2f64:
 678   case MVT::v2i64: OpcodeIndex = 7; break;
 679   }
 680   unsigned Opc = Opcodes[OpcodeIndex];
 681
 682   std::vector<EVT> ResTys;
 683   if (isUpdating)
 684     ResTys.push_back(MVT::i64);
 685   ResTys.push_back(MVT::Other); // Type for the Chain
 686
 687   SmallVector<SDValue, 6> Ops;
 688   Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
 689
 690   if (isUpdating) {
 691     SDValue Inc = N->getOperand(AddrOpIdx + 1);
 692     if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
 693       Opc = getVLDSTRegisterUpdateOpcode(Opc);
 694     Ops.push_back(Inc);
 695   }
 696   bool is64BitVector = VT.is64BitVector();
 697
 698   SDValue V0 = N->getOperand(Vec0Idx + 0);
 699   SDValue SrcReg;
 700   if (NumVecs == 1)
 701     SrcReg = V0;
 702   else {
 703     SDValue V1 = N->getOperand(Vec0Idx + 1);
 704     if (NumVecs == 2)
 705       SrcReg = is64BitVector ? SDValue(createDPairNode(V0, V1), 0)
 706                              : SDValue(createQPairNode(V0, V1), 0);
 707     else {
 708       SDValue V2 = N->getOperand(Vec0Idx + 2);
 709       if (NumVecs == 3)
 710         SrcReg = is64BitVector ? SDValue(createDTripleNode(V0, V1, V2), 0)
 711                                : SDValue(createQTripleNode(V0, V1, V2), 0);
 712       else {
 713         SDValue V3 = N->getOperand(Vec0Idx + 3);
 714         SrcReg = is64BitVector ? SDValue(createDQuadNode(V0, V1, V2, V3), 0)
 715                                : SDValue(createQQuadNode(V0, V1, V2, V3), 0);
 716       }
 717     }
 718   }
 719   Ops.push_back(SrcReg);
 720
 721   // Push back the Chain
 722   Ops.push_back(N->getOperand(0));
 723
 724   // Transfer memoperands.
 725   SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 726   cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
 727
 728   return VSt;
 729 }
 730
 731 SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
 732   // Dump information about the Node being selected
 733   DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
 734
 735   if (Node->isMachineOpcode()) {
 736     DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
 737     Node->setNodeId(-1);
 738     return NULL;
 739   }
 740
 741   switch (Node->getOpcode()) {
 742   case ISD::ATOMIC_LOAD_ADD:
 743     return SelectAtomic(Node,
 744                         AArch64::ATOMIC_LOAD_ADD_I8,
 745                         AArch64::ATOMIC_LOAD_ADD_I16,
 746                         AArch64::ATOMIC_LOAD_ADD_I32,
 747                         AArch64::ATOMIC_LOAD_ADD_I64);
 748   case ISD::ATOMIC_LOAD_SUB:
 749     return SelectAtomic(Node,
 750                         AArch64::ATOMIC_LOAD_SUB_I8,
 751                         AArch64::ATOMIC_LOAD_SUB_I16,
 752                         AArch64::ATOMIC_LOAD_SUB_I32,
 753                         AArch64::ATOMIC_LOAD_SUB_I64);
 754   case ISD::ATOMIC_LOAD_AND:
 755     return SelectAtomic(Node,
 756                         AArch64::ATOMIC_LOAD_AND_I8,
 757                         AArch64::ATOMIC_LOAD_AND_I16,
 758                         AArch64::ATOMIC_LOAD_AND_I32,
 759                         AArch64::ATOMIC_LOAD_AND_I64);
 760   case ISD::ATOMIC_LOAD_OR:
 761     return SelectAtomic(Node,
 762                         AArch64::ATOMIC_LOAD_OR_I8,
 763                         AArch64::ATOMIC_LOAD_OR_I16,
 764                         AArch64::ATOMIC_LOAD_OR_I32,
 765                         AArch64::ATOMIC_LOAD_OR_I64);
 766   case ISD::ATOMIC_LOAD_XOR:
 767     return SelectAtomic(Node,
 768                         AArch64::ATOMIC_LOAD_XOR_I8,
 769                         AArch64::ATOMIC_LOAD_XOR_I16,
 770                         AArch64::ATOMIC_LOAD_XOR_I32,
 771                         AArch64::ATOMIC_LOAD_XOR_I64);
 772   case ISD::ATOMIC_LOAD_NAND:
 773     return SelectAtomic(Node,
 774                         AArch64::ATOMIC_LOAD_NAND_I8,
 775                         AArch64::ATOMIC_LOAD_NAND_I16,
 776                         AArch64::ATOMIC_LOAD_NAND_I32,
 777                         AArch64::ATOMIC_LOAD_NAND_I64);
 778   case ISD::ATOMIC_LOAD_MIN:
 779     return SelectAtomic(Node,
 780                         AArch64::ATOMIC_LOAD_MIN_I8,
 781                         AArch64::ATOMIC_LOAD_MIN_I16,
 782                         AArch64::ATOMIC_LOAD_MIN_I32,
 783                         AArch64::ATOMIC_LOAD_MIN_I64);
 784   case ISD::ATOMIC_LOAD_MAX:
 785     return SelectAtomic(Node,
 786                         AArch64::ATOMIC_LOAD_MAX_I8,
 787                         AArch64::ATOMIC_LOAD_MAX_I16,
 788                         AArch64::ATOMIC_LOAD_MAX_I32,
 789                         AArch64::ATOMIC_LOAD_MAX_I64);
 790   case ISD::ATOMIC_LOAD_UMIN:
 791     return SelectAtomic(Node,
 792                         AArch64::ATOMIC_LOAD_UMIN_I8,
 793                         AArch64::ATOMIC_LOAD_UMIN_I16,
 794                         AArch64::ATOMIC_LOAD_UMIN_I32,
 795                         AArch64::ATOMIC_LOAD_UMIN_I64);
 796   case ISD::ATOMIC_LOAD_UMAX:
 797     return SelectAtomic(Node,
 798                         AArch64::ATOMIC_LOAD_UMAX_I8,
 799                         AArch64::ATOMIC_LOAD_UMAX_I16,
 800                         AArch64::ATOMIC_LOAD_UMAX_I32,
 801                         AArch64::ATOMIC_LOAD_UMAX_I64);
 802   case ISD::ATOMIC_SWAP:
 803     return SelectAtomic(Node,
 804                         AArch64::ATOMIC_SWAP_I8,
 805                         AArch64::ATOMIC_SWAP_I16,
 806                         AArch64::ATOMIC_SWAP_I32,
 807                         AArch64::ATOMIC_SWAP_I64);
 808   case ISD::ATOMIC_CMP_SWAP:
 809     return SelectAtomic(Node,
 810                         AArch64::ATOMIC_CMP_SWAP_I8,
 811                         AArch64::ATOMIC_CMP_SWAP_I16,
 812                         AArch64::ATOMIC_CMP_SWAP_I32,
 813                         AArch64::ATOMIC_CMP_SWAP_I64);
 814   case ISD::FrameIndex: {
 815     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
 816     EVT PtrTy = getTargetLowering()->getPointerTy();
 817     SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy);
 818     return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy,
 819                                 TFI, CurDAG->getTargetConstant(0, PtrTy));
 820   }
 821   case ISD::ConstantPool: {
 822     // Constant pools are fine, just create a Target entry.
 823     ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Node);
 824     const Constant *C = CN->getConstVal();
 825     SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0));
 826
 827     ReplaceUses(SDValue(Node, 0), CP);
 828     return NULL;
 829   }
 830   case ISD::Constant: {
 831     SDNode *ResNode = 0;
 832     if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) {
 833       // XZR and WZR are probably even better than an actual move: most of the
 834       // time they can be folded into another instruction with *no* cost.
 835
 836       EVT Ty = Node->getValueType(0);
 837       assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type");
 838       uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR;
 839       ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
 840                                        SDLoc(Node),
 841                                        Register, Ty).getNode();
 842     }
 843
 844     // Next best option is a move-immediate, see if we can do that.
 845     if (!ResNode) {
 846       ResNode = TrySelectToMoveImm(Node);
 847     }
 848
 849     if (ResNode)
 850       return ResNode;
 851
 852     // If even that fails we fall back to a lit-pool entry at the moment. Future
 853     // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions.
 854     ResNode = SelectToLitPool(Node);
 855     assert(ResNode && "We need *some* way to materialise a constant");
 856
 857     // We want to continue selection at this point since the litpool access
 858     // generated used generic nodes for simplicity.
 859     ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
 860     Node = ResNode;
 861     break;
 862   }
 863   case ISD::ConstantFP: {
 864     if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) {
 865       // FMOV will take care of it from TableGen
 866       break;
 867     }
 868
 869     SDNode *ResNode = LowerToFPLitPool(Node);
 870     ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
 871
 872     // We want to continue selection at this point since the litpool access
 873     // generated used generic nodes for simplicity.
 874     Node = ResNode;
 875     break;
 876   }
 877   case AArch64ISD::NEON_LD1_UPD: {
 878     static const uint16_t Opcodes[] = {
 879       AArch64::LD1WB_8B_fixed,  AArch64::LD1WB_4H_fixed,
 880       AArch64::LD1WB_2S_fixed,  AArch64::LD1WB_1D_fixed,
 881       AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed,
 882       AArch64::LD1WB_4S_fixed,  AArch64::LD1WB_2D_fixed
 883     };
 884     return SelectVLD(Node, 1, true, Opcodes);
 885   }
 886   case AArch64ISD::NEON_LD2_UPD: {
 887     static const uint16_t Opcodes[] = {
 888       AArch64::LD2WB_8B_fixed,  AArch64::LD2WB_4H_fixed,
 889       AArch64::LD2WB_2S_fixed,  AArch64::LD1WB2V_1D_fixed,
 890       AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed,
 891       AArch64::LD2WB_4S_fixed,  AArch64::LD2WB_2D_fixed
 892     };
 893     return SelectVLD(Node, 2, true, Opcodes);
 894   }
 895   case AArch64ISD::NEON_LD3_UPD: {
 896     static const uint16_t Opcodes[] = {
 897       AArch64::LD3WB_8B_fixed,  AArch64::LD3WB_4H_fixed,
 898       AArch64::LD3WB_2S_fixed,  AArch64::LD1WB3V_1D_fixed,
 899       AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed,
 900       AArch64::LD3WB_4S_fixed,  AArch64::LD3WB_2D_fixed
 901     };
 902     return SelectVLD(Node, 3, true, Opcodes);
 903   }
 904   case AArch64ISD::NEON_LD4_UPD: {
 905     static const uint16_t Opcodes[] = {
 906       AArch64::LD4WB_8B_fixed,  AArch64::LD4WB_4H_fixed,
 907       AArch64::LD4WB_2S_fixed,  AArch64::LD1WB4V_1D_fixed,
 908       AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed,
 909       AArch64::LD4WB_4S_fixed,  AArch64::LD4WB_2D_fixed
 910     };
 911     return SelectVLD(Node, 4, true, Opcodes);
 912   }
 913   case AArch64ISD::NEON_ST1_UPD: {
 914     static const uint16_t Opcodes[] = {
 915       AArch64::ST1WB_8B_fixed,  AArch64::ST1WB_4H_fixed,
 916       AArch64::ST1WB_2S_fixed,  AArch64::ST1WB_1D_fixed,
 917       AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed,
 918       AArch64::ST1WB_4S_fixed,  AArch64::ST1WB_2D_fixed
 919     };
 920     return SelectVST(Node, 1, true, Opcodes);
 921   }
 922   case AArch64ISD::NEON_ST2_UPD: {
 923     static const uint16_t Opcodes[] = {
 924       AArch64::ST2WB_8B_fixed,  AArch64::ST2WB_4H_fixed,
 925       AArch64::ST2WB_2S_fixed,  AArch64::ST1WB2V_1D_fixed,
 926       AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed,
 927       AArch64::ST2WB_4S_fixed,  AArch64::ST2WB_2D_fixed
 928     };
 929     return SelectVST(Node, 2, true, Opcodes);
 930   }
 931   case AArch64ISD::NEON_ST3_UPD: {
 932     static const uint16_t Opcodes[] = {
 933       AArch64::ST3WB_8B_fixed,  AArch64::ST3WB_4H_fixed,
 934       AArch64::ST3WB_2S_fixed,  AArch64::ST1WB3V_1D_fixed,
 935       AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed,
 936       AArch64::ST3WB_4S_fixed,  AArch64::ST3WB_2D_fixed
 937     };
 938     return SelectVST(Node, 3, true, Opcodes);
 939   }
 940   case AArch64ISD::NEON_ST4_UPD: {
 941     static const uint16_t Opcodes[] = {
 942       AArch64::ST4WB_8B_fixed,  AArch64::ST4WB_4H_fixed,
 943       AArch64::ST4WB_2S_fixed,  AArch64::ST1WB4V_1D_fixed,
 944       AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed,
 945       AArch64::ST4WB_4S_fixed,  AArch64::ST4WB_2D_fixed
 946     };
 947     return SelectVST(Node, 4, true, Opcodes);
 948   }
 949   case ISD::INTRINSIC_VOID:
 950   case ISD::INTRINSIC_W_CHAIN: {
 951     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
 952     switch (IntNo) {
 953     default:
 954       break;
 955
 956     case Intrinsic::arm_neon_vld1: {
 957       static const uint16_t Opcodes[] = { AArch64::LD1_8B,  AArch64::LD1_4H,
 958                                           AArch64::LD1_2S,  AArch64::LD1_1D,
 959                                           AArch64::LD1_16B, AArch64::LD1_8H,
 960                                           AArch64::LD1_4S,  AArch64::LD1_2D };
 961       return SelectVLD(Node, 1, false, Opcodes);
 962     }
 963     case Intrinsic::arm_neon_vld2: {
 964       static const uint16_t Opcodes[] = { AArch64::LD2_8B,  AArch64::LD2_4H,
 965                                           AArch64::LD2_2S,  AArch64::LD1_2V_1D,
 966                                           AArch64::LD2_16B, AArch64::LD2_8H,
 967                                           AArch64::LD2_4S,  AArch64::LD2_2D };
 968       return SelectVLD(Node, 2, false, Opcodes);
 969     }
 970     case Intrinsic::arm_neon_vld3: {
 971       static const uint16_t Opcodes[] = { AArch64::LD3_8B,  AArch64::LD3_4H,
 972                                           AArch64::LD3_2S,  AArch64::LD1_3V_1D,
 973                                           AArch64::LD3_16B, AArch64::LD3_8H,
 974                                           AArch64::LD3_4S,  AArch64::LD3_2D };
 975       return SelectVLD(Node, 3, false, Opcodes);
 976     }
 977     case Intrinsic::arm_neon_vld4: {
 978       static const uint16_t Opcodes[] = { AArch64::LD4_8B,  AArch64::LD4_4H,
 979                                           AArch64::LD4_2S,  AArch64::LD1_4V_1D,
 980                                           AArch64::LD4_16B, AArch64::LD4_8H,
 981                                           AArch64::LD4_4S,  AArch64::LD4_2D };
 982       return SelectVLD(Node, 4, false, Opcodes);
 983     }
 984     case Intrinsic::arm_neon_vst1: {
 985       static const uint16_t Opcodes[] = { AArch64::ST1_8B,  AArch64::ST1_4H,
 986                                           AArch64::ST1_2S,  AArch64::ST1_1D,
 987                                           AArch64::ST1_16B, AArch64::ST1_8H,
 988                                           AArch64::ST1_4S,  AArch64::ST1_2D };
 989       return SelectVST(Node, 1, false, Opcodes);
 990     }
 991     case Intrinsic::arm_neon_vst2: {
 992       static const uint16_t Opcodes[] = { AArch64::ST2_8B,  AArch64::ST2_4H,
 993                                           AArch64::ST2_2S,  AArch64::ST1_2V_1D,
 994                                           AArch64::ST2_16B, AArch64::ST2_8H,
 995                                           AArch64::ST2_4S,  AArch64::ST2_2D };
 996       return SelectVST(Node, 2, false, Opcodes);
 997     }
 998     case Intrinsic::arm_neon_vst3: {
 999       static const uint16_t Opcodes[] = { AArch64::ST3_8B,  AArch64::ST3_4H,
1000                                           AArch64::ST3_2S,  AArch64::ST1_3V_1D,
1001                                           AArch64::ST3_16B, AArch64::ST3_8H,
1002                                           AArch64::ST3_4S,  AArch64::ST3_2D };
1003       return SelectVST(Node, 3, false, Opcodes);
1004     }
1005     case Intrinsic::arm_neon_vst4: {
1006       static const uint16_t Opcodes[] = { AArch64::ST4_8B,  AArch64::ST4_4H,
1007                                           AArch64::ST4_2S,  AArch64::ST1_4V_1D,
1008                                           AArch64::ST4_16B, AArch64::ST4_8H,
1009                                           AArch64::ST4_4S,  AArch64::ST4_2D };
1010       return SelectVST(Node, 4, false, Opcodes);
1011     }
1012     }
1013     break;
1014   }
1015   default:
1016     break; // Let generic code handle it
1017   }
1018
1019   SDNode *ResNode = SelectCode(Node);
1020
1021   DEBUG(dbgs() << "=> ";
1022         if (ResNode == NULL || ResNode == Node)
1023           Node->dump(CurDAG);
1024         else
1025           ResNode->dump(CurDAG);
1026         dbgs() << "\n");
1027
1028   return ResNode;
1029 }
1030
1031 /// This pass converts a legalized DAG into a AArch64-specific DAG, ready for
1032 /// instruction scheduling.
1033 FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM,
1034                                          CodeGenOpt::Level OptLevel) {
1035   return new AArch64DAGToDAGISel(TM, OptLevel);
1036 }