lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

   1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the AArch64 target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #define DEBUG_TYPE "aarch64-isel"
  15 #include "AArch64.h"
  16 #include "AArch64InstrInfo.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "Utils/AArch64BaseInfo.h"
  20 #include "llvm/ADT/APSInt.h"
  21 #include "llvm/CodeGen/SelectionDAGISel.h"
  22 #include "llvm/IR/GlobalValue.h"
  23 #include "llvm/Support/Debug.h"
  24 #include "llvm/Support/raw_ostream.h"
  25
  26 using namespace llvm;
  27
  28 //===--------------------------------------------------------------------===//
  29 /// AArch64 specific code to select AArch64 machine instructions for
  30 /// SelectionDAG operations.
  31 ///
  32 namespace {
  33
  34 class AArch64DAGToDAGISel : public SelectionDAGISel {
  35   AArch64TargetMachine &TM;
  36
  37   /// Keep a pointer to the AArch64Subtarget around so that we can
  38   /// make the right decision when generating code for different targets.
  39   const AArch64Subtarget *Subtarget;
  40
  41 public:
  42   explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
  43                                CodeGenOpt::Level OptLevel)
  44     : SelectionDAGISel(tm, OptLevel), TM(tm),
  45       Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
  46   }
  47
  48   virtual const char *getPassName() const {
  49     return "AArch64 Instruction Selection";
  50   }
  51
  52   // Include the pieces autogenerated from the target description.
  53 #include "AArch64GenDAGISel.inc"
  54
  55   template<unsigned MemSize>
  56   bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) {
  57     const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
  58     if (!CN || CN->getZExtValue() % MemSize != 0
  59         || CN->getZExtValue() / MemSize > 0xfff)
  60       return false;
  61
  62     UImm12 =  CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64);
  63     return true;
  64   }
  65
  66   template<unsigned RegWidth>
  67   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
  68     return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
  69   }
  70
  71   /// Used for pre-lowered address-reference nodes, so we already know
  72   /// the fields match. This operand's job is simply to add an
  73   /// appropriate shift operand to the MOVZ/MOVK instruction.
  74   template<unsigned LogShift>
  75   bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) {
  76     Imm = N;
  77     Shift = CurDAG->getTargetConstant(LogShift, MVT::i32);
  78     return true;
  79   }
  80
  81   bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
  82
  83   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
  84                                 unsigned RegWidth);
  85
  86   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
  87                                     char ConstraintCode,
  88                                     std::vector<SDValue> &OutOps);
  89
  90   bool SelectLogicalImm(SDValue N, SDValue &Imm);
  91
  92   template<unsigned RegWidth>
  93   bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) {
  94     return SelectTSTBOperand(N, FixedPos, RegWidth);
  95   }
  96
  97   bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
  98
  99   SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32,
 100                        unsigned Op64);
 101
 102   /// Put the given constant into a pool and return a DAG which will give its
 103   /// address.
 104   SDValue getConstantPoolItemAddress(SDLoc DL, const Constant *CV);
 105
 106   SDNode *TrySelectToMoveImm(SDNode *N);
 107   SDNode *LowerToFPLitPool(SDNode *Node);
 108   SDNode *SelectToLitPool(SDNode *N);
 109
 110   SDNode* Select(SDNode*);
 111 private:
 112   /// Get the opcode for table lookup instruction
 113   unsigned getTBLOpc(bool IsExt, bool Is64Bit, unsigned NumOfVec);
 114
 115   /// Select NEON table lookup intrinsics.  NumVecs should be 1, 2, 3 or 4.
 116   /// IsExt is to indicate if the result will be extended with an argument.
 117   SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt);
 118
 119   /// Select NEON load intrinsics.  NumVecs should be 1, 2, 3 or 4.
 120   SDNode *SelectVLD(SDNode *N, unsigned NumVecs, bool isUpdating,
 121                     const uint16_t *Opcode);
 122
 123   /// Select NEON store intrinsics.  NumVecs should be 1, 2, 3 or 4.
 124   SDNode *SelectVST(SDNode *N, unsigned NumVecs, bool isUpdating,
 125                     const uint16_t *Opcodes);
 126
 127   /// Form sequences of consecutive 64/128-bit registers for use in NEON
 128   /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
 129   /// between 1 and 4 elements. If it contains a single element that is returned
 130   /// unchanged; otherwise a REG_SEQUENCE value is returned.
 131   SDValue createDTuple(ArrayRef<SDValue> Vecs);
 132   SDValue createQTuple(ArrayRef<SDValue> Vecs);
 133
 134   /// Generic helper for the createDTuple/createQTuple
 135   /// functions. Those should almost always be called instead.
 136   SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
 137                       unsigned SubRegs[]);
 138 };
 139 }
 140
 141 bool
 142 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
 143                                               unsigned RegWidth) {
 144   const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
 145   if (!CN) return false;
 146
 147   // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
 148   // is between 1 and 32 for a destination w-register, or 1 and 64 for an
 149   // x-register.
 150   //
 151   // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
 152   // want THIS_NODE to be 2^fbits. This is much easier to deal with using
 153   // integers.
 154   bool IsExact;
 155
 156   // fbits is between 1 and 64 in the worst-case, which means the fmul
 157   // could have 2^64 as an actual operand. Need 65 bits of precision.
 158   APSInt IntVal(65, true);
 159   CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
 160
 161   // N.b. isPowerOf2 also checks for > 0.
 162   if (!IsExact || !IntVal.isPowerOf2()) return false;
 163   unsigned FBits = IntVal.logBase2();
 164
 165   // Checks above should have guaranteed that we haven't lost information in
 166   // finding FBits, but it must still be in range.
 167   if (FBits == 0 || FBits > RegWidth) return false;
 168
 169   FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32);
 170   return true;
 171 }
 172
 173 bool
 174 AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
 175                                                  char ConstraintCode,
 176                                                  std::vector<SDValue> &OutOps) {
 177   switch (ConstraintCode) {
 178   default: llvm_unreachable("Unrecognised AArch64 memory constraint");
 179   case 'm':
 180     // FIXME: more freedom is actually permitted for 'm'. We can go
 181     // hunting for a base and an offset if we want. Of course, since
 182     // we don't really know how the operand is going to be used we're
 183     // probably restricted to the load/store pair's simm7 as an offset
 184     // range anyway.
 185   case 'Q':
 186     OutOps.push_back(Op);
 187   }
 188
 189   return false;
 190 }
 191
 192 bool
 193 AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) {
 194   ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N);
 195   if (!Imm || !Imm->getValueAPF().isPosZero())
 196     return false;
 197
 198   // Doesn't actually carry any information, but keeps TableGen quiet.
 199   Dummy = CurDAG->getTargetConstant(0, MVT::i32);
 200   return true;
 201 }
 202
 203 bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) {
 204   uint32_t Bits;
 205   uint32_t RegWidth = N.getValueType().getSizeInBits();
 206
 207   ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
 208   if (!CN) return false;
 209
 210   if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits))
 211     return false;
 212
 213   Imm = CurDAG->getTargetConstant(Bits, MVT::i32);
 214   return true;
 215 }
 216
 217 SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) {
 218   SDNode *ResNode;
 219   SDLoc dl(Node);
 220   EVT DestType = Node->getValueType(0);
 221   unsigned DestWidth = DestType.getSizeInBits();
 222
 223   unsigned MOVOpcode;
 224   EVT MOVType;
 225   int UImm16, Shift;
 226   uint32_t LogicalBits;
 227
 228   uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue();
 229   if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) {
 230     MOVType = DestType;
 231     MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii;
 232   } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) {
 233     MOVType = DestType;
 234     MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii;
 235   } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) {
 236     // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can
 237     // use a 32-bit instruction: "movn w0, 0xedbc".
 238     MOVType = MVT::i32;
 239     MOVOpcode = AArch64::MOVNwii;
 240   } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits))  {
 241     MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi;
 242     uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR;
 243
 244     return CurDAG->getMachineNode(MOVOpcode, dl, DestType,
 245                               CurDAG->getRegister(ZR, DestType),
 246                               CurDAG->getTargetConstant(LogicalBits, MVT::i32));
 247   } else {
 248     // Can't handle it in one instruction. There's scope for permitting two (or
 249     // more) instructions, but that'll need more thought.
 250     return NULL;
 251   }
 252
 253   ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType,
 254                                    CurDAG->getTargetConstant(UImm16, MVT::i32),
 255                                    CurDAG->getTargetConstant(Shift, MVT::i32));
 256
 257   if (MOVType != DestType) {
 258     ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
 259                           MVT::i64, MVT::i32, MVT::Other,
 260                           CurDAG->getTargetConstant(0, MVT::i64),
 261                           SDValue(ResNode, 0),
 262                           CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32));
 263   }
 264
 265   return ResNode;
 266 }
 267
 268 SDValue
 269 AArch64DAGToDAGISel::getConstantPoolItemAddress(SDLoc DL,
 270                                                 const Constant *CV) {
 271   EVT PtrVT = getTargetLowering()->getPointerTy();
 272
 273   switch (getTargetLowering()->getTargetMachine().getCodeModel()) {
 274   case CodeModel::Small: {
 275     unsigned Alignment =
 276       getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType());
 277     return CurDAG->getNode(
 278         AArch64ISD::WrapperSmall, DL, PtrVT,
 279         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG),
 280         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12),
 281         CurDAG->getConstant(Alignment, MVT::i32));
 282   }
 283   case CodeModel::Large: {
 284     SDNode *LitAddr;
 285     LitAddr = CurDAG->getMachineNode(
 286         AArch64::MOVZxii, DL, PtrVT,
 287         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3),
 288         CurDAG->getTargetConstant(3, MVT::i32));
 289     LitAddr = CurDAG->getMachineNode(
 290         AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
 291         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC),
 292         CurDAG->getTargetConstant(2, MVT::i32));
 293     LitAddr = CurDAG->getMachineNode(
 294         AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
 295         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC),
 296         CurDAG->getTargetConstant(1, MVT::i32));
 297     LitAddr = CurDAG->getMachineNode(
 298         AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
 299         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC),
 300         CurDAG->getTargetConstant(0, MVT::i32));
 301     return SDValue(LitAddr, 0);
 302   }
 303   default:
 304     llvm_unreachable("Only small and large code models supported now");
 305   }
 306 }
 307
 308 SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
 309   SDLoc DL(Node);
 310   uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue();
 311   int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue();
 312   EVT DestType = Node->getValueType(0);
 313
 314   // Since we may end up loading a 64-bit constant from a 32-bit entry the
 315   // constant in the pool may have a different type to the eventual node.
 316   ISD::LoadExtType Extension;
 317   EVT MemType;
 318
 319   assert((DestType == MVT::i64 || DestType == MVT::i32)
 320          && "Only expect integer constants at the moment");
 321
 322   if (DestType == MVT::i32) {
 323     Extension = ISD::NON_EXTLOAD;
 324     MemType = MVT::i32;
 325   } else if (UnsignedVal <= UINT32_MAX) {
 326     Extension = ISD::ZEXTLOAD;
 327     MemType = MVT::i32;
 328   } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) {
 329     Extension = ISD::SEXTLOAD;
 330     MemType = MVT::i32;
 331   } else {
 332     Extension = ISD::NON_EXTLOAD;
 333     MemType = MVT::i64;
 334   }
 335
 336   Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(),
 337                                                   MemType.getSizeInBits()),
 338                                   UnsignedVal);
 339   SDValue PoolAddr = getConstantPoolItemAddress(DL, CV);
 340   unsigned Alignment =
 341     getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType());
 342
 343   return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(),
 344                             PoolAddr,
 345                             MachinePointerInfo::getConstantPool(), MemType,
 346                             /* isVolatile = */ false,
 347                             /* isNonTemporal = */ false,
 348                             Alignment).getNode();
 349 }
 350
 351 SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) {
 352   SDLoc DL(Node);
 353   const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue();
 354   EVT DestType = Node->getValueType(0);
 355
 356   unsigned Alignment =
 357     getTargetLowering()->getDataLayout()->getABITypeAlignment(FV->getType());
 358   SDValue PoolAddr = getConstantPoolItemAddress(DL, FV);
 359
 360   return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr,
 361                          MachinePointerInfo::getConstantPool(),
 362                          /* isVolatile = */ false,
 363                          /* isNonTemporal = */ false,
 364                          /* isInvariant = */ true,
 365                          Alignment).getNode();
 366 }
 367
 368 bool
 369 AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
 370                                        unsigned RegWidth) {
 371   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
 372   if (!CN) return false;
 373
 374   uint64_t Val = CN->getZExtValue();
 375
 376   if (!isPowerOf2_64(Val)) return false;
 377
 378   unsigned TestedBit = Log2_64(Val);
 379   // Checks above should have guaranteed that we haven't lost information in
 380   // finding TestedBit, but it must still be in range.
 381   if (TestedBit >= RegWidth) return false;
 382
 383   FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64);
 384   return true;
 385 }
 386
 387 SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
 388                                           unsigned Op16,unsigned Op32,
 389                                           unsigned Op64) {
 390   // Mostly direct translation to the given operations, except that we preserve
 391   // the AtomicOrdering for use later on.
 392   AtomicSDNode *AN = cast<AtomicSDNode>(Node);
 393   EVT VT = AN->getMemoryVT();
 394
 395   unsigned Op;
 396   if (VT == MVT::i8)
 397     Op = Op8;
 398   else if (VT == MVT::i16)
 399     Op = Op16;
 400   else if (VT == MVT::i32)
 401     Op = Op32;
 402   else if (VT == MVT::i64)
 403     Op = Op64;
 404   else
 405     llvm_unreachable("Unexpected atomic operation");
 406
 407   SmallVector<SDValue, 4> Ops;
 408   for (unsigned i = 1; i < AN->getNumOperands(); ++i)
 409       Ops.push_back(AN->getOperand(i));
 410
 411   Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
 412   Ops.push_back(AN->getOperand(0)); // Chain moves to the end
 413
 414   return CurDAG->SelectNodeTo(Node, Op,
 415                               AN->getValueType(0), MVT::Other,
 416                               &Ops[0], Ops.size());
 417 }
 418
 419 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
 420   static unsigned RegClassIDs[] = { AArch64::DPairRegClassID,
 421                                     AArch64::DTripleRegClassID,
 422                                     AArch64::DQuadRegClassID };
 423   static unsigned SubRegs[] = { AArch64::dsub_0, AArch64::dsub_1,
 424                                 AArch64::dsub_2, AArch64::dsub_3 };
 425
 426   return createTuple(Regs, RegClassIDs, SubRegs);
 427 }
 428
 429 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
 430   static unsigned RegClassIDs[] = { AArch64::QPairRegClassID,
 431                                     AArch64::QTripleRegClassID,
 432                                     AArch64::QQuadRegClassID };
 433   static unsigned SubRegs[] = { AArch64::qsub_0, AArch64::qsub_1,
 434                                 AArch64::qsub_2, AArch64::qsub_3 };
 435
 436   return createTuple(Regs, RegClassIDs, SubRegs);
 437 }
 438
 439 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
 440                                          unsigned RegClassIDs[],
 441                                          unsigned SubRegs[]) {
 442   // There's no special register-class for a vector-list of 1 element: it's just
 443   // a vector.
 444   if (Regs.size() == 1)
 445     return Regs[0];
 446
 447   assert(Regs.size() >= 2 && Regs.size() <= 4);
 448
 449   SDLoc DL(Regs[0].getNode());
 450
 451   SmallVector<SDValue, 4> Ops;
 452
 453   // First operand of REG_SEQUENCE is the desired RegClass.
 454   Ops.push_back(
 455       CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));
 456
 457   // Then we get pairs of source & subregister-position for the components.
 458   for (unsigned i = 0; i < Regs.size(); ++i) {
 459     Ops.push_back(Regs[i]);
 460     Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));
 461   }
 462
 463   SDNode *N =
 464       CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
 465   return SDValue(N, 0);
 466 }
 467
 468
 469 // Get the register stride update opcode of a VLD/VST instruction that
 470 // is otherwise equivalent to the given fixed stride updating instruction.
 471 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
 472   switch (Opc) {
 473   default: break;
 474   case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register;
 475   case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register;
 476   case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register;
 477   case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register;
 478   case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register;
 479   case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register;
 480   case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register;
 481   case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register;
 482
 483   case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register;
 484   case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register;
 485   case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register;
 486   case AArch64::LD1WB2V_1D_fixed: return AArch64::LD1WB2V_1D_register;
 487   case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register;
 488   case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register;
 489   case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register;
 490   case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register;
 491
 492   case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register;
 493   case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register;
 494   case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register;
 495   case AArch64::LD1WB3V_1D_fixed: return AArch64::LD1WB3V_1D_register;
 496   case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register;
 497   case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register;
 498   case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register;
 499   case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register;
 500
 501   case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register;
 502   case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register;
 503   case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register;
 504   case AArch64::LD1WB4V_1D_fixed: return AArch64::LD1WB4V_1D_register;
 505   case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register;
 506   case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register;
 507   case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register;
 508   case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register;
 509
 510   case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register;
 511   case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register;
 512   case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register;
 513   case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register;
 514   case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register;
 515   case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register;
 516   case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register;
 517   case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register;
 518
 519   case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register;
 520   case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register;
 521   case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register;
 522   case AArch64::ST1WB2V_1D_fixed: return AArch64::ST1WB2V_1D_register;
 523   case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register;
 524   case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register;
 525   case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register;
 526   case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register;
 527
 528   case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register;
 529   case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register;
 530   case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register;
 531   case AArch64::ST1WB3V_1D_fixed: return AArch64::ST1WB3V_1D_register;
 532   case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register;
 533   case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register;
 534   case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register;
 535   case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register;
 536
 537   case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register;
 538   case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register;
 539   case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register;
 540   case AArch64::ST1WB4V_1D_fixed: return AArch64::ST1WB4V_1D_register;
 541   case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register;
 542   case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register;
 543   case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register;
 544   case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register;
 545   }
 546   return Opc; // If not one we handle, return it unchanged.
 547 }
 548
 549 SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
 550                                        bool isUpdating,
 551                                        const uint16_t *Opcodes) {
 552   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
 553
 554   EVT VT = N->getValueType(0);
 555   unsigned OpcodeIndex;
 556   switch (VT.getSimpleVT().SimpleTy) {
 557   default: llvm_unreachable("unhandled vector load type");
 558   case MVT::v8i8:  OpcodeIndex = 0; break;
 559   case MVT::v4i16: OpcodeIndex = 1; break;
 560   case MVT::v2f32:
 561   case MVT::v2i32: OpcodeIndex = 2; break;
 562   case MVT::v1f64:
 563   case MVT::v1i64: OpcodeIndex = 3; break;
 564   case MVT::v16i8: OpcodeIndex = 4; break;
 565   case MVT::v8f16:
 566   case MVT::v8i16: OpcodeIndex = 5; break;
 567   case MVT::v4f32:
 568   case MVT::v4i32: OpcodeIndex = 6; break;
 569   case MVT::v2f64:
 570   case MVT::v2i64: OpcodeIndex = 7; break;
 571   }
 572   unsigned Opc = Opcodes[OpcodeIndex];
 573
 574   SmallVector<SDValue, 2> Ops;
 575   unsigned AddrOpIdx = isUpdating ? 1 : 2;
 576   Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
 577
 578   if (isUpdating) {
 579     SDValue Inc = N->getOperand(AddrOpIdx + 1);
 580     if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
 581       Opc = getVLDSTRegisterUpdateOpcode(Opc);
 582     Ops.push_back(Inc);
 583   }
 584
 585   Ops.push_back(N->getOperand(0)); // Push back the Chain
 586
 587   std::vector<EVT> ResTys;
 588   bool is64BitVector = VT.is64BitVector();
 589
 590   if (NumVecs == 1)
 591     ResTys.push_back(VT);
 592   else if (NumVecs == 3)
 593     ResTys.push_back(MVT::Untyped);
 594   else {
 595     EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
 596                                  is64BitVector ? NumVecs : NumVecs * 2);
 597     ResTys.push_back(ResTy);
 598   }
 599
 600   if (isUpdating)
 601     ResTys.push_back(MVT::i64); // Type of the updated register
 602   ResTys.push_back(MVT::Other); // Type of the Chain
 603   SDLoc dl(N);
 604   SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 605
 606   // Transfer memoperands.
 607   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
 608   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
 609   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
 610
 611   if (NumVecs == 1)
 612     return VLd;
 613
 614   // If NumVecs > 1, the return result is a super register containing 2-4
 615   // consecutive vector registers.
 616   SDValue SuperReg = SDValue(VLd, 0);
 617
 618   unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
 619   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
 620     ReplaceUses(SDValue(N, Vec),
 621                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
 622   // Update users of the Chain
 623   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
 624   if (isUpdating)
 625     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
 626
 627   return NULL;
 628 }
 629
 630 SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
 631                                        bool isUpdating,
 632                                        const uint16_t *Opcodes) {
 633   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
 634   SDLoc dl(N);
 635
 636   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
 637   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
 638
 639   unsigned AddrOpIdx = isUpdating ? 1 : 2;
 640   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
 641   EVT VT = N->getOperand(Vec0Idx).getValueType();
 642   unsigned OpcodeIndex;
 643   switch (VT.getSimpleVT().SimpleTy) {
 644   default: llvm_unreachable("unhandled vector store type");
 645   case MVT::v8i8:  OpcodeIndex = 0; break;
 646   case MVT::v4i16: OpcodeIndex = 1; break;
 647   case MVT::v2f32:
 648   case MVT::v2i32: OpcodeIndex = 2; break;
 649   case MVT::v1f64:
 650   case MVT::v1i64: OpcodeIndex = 3; break;
 651   case MVT::v16i8: OpcodeIndex = 4; break;
 652   case MVT::v8f16:
 653   case MVT::v8i16: OpcodeIndex = 5; break;
 654   case MVT::v4f32:
 655   case MVT::v4i32: OpcodeIndex = 6; break;
 656   case MVT::v2f64:
 657   case MVT::v2i64: OpcodeIndex = 7; break;
 658   }
 659   unsigned Opc = Opcodes[OpcodeIndex];
 660
 661   std::vector<EVT> ResTys;
 662   if (isUpdating)
 663     ResTys.push_back(MVT::i64);
 664   ResTys.push_back(MVT::Other); // Type for the Chain
 665
 666   SmallVector<SDValue, 6> Ops;
 667   Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
 668
 669   if (isUpdating) {
 670     SDValue Inc = N->getOperand(AddrOpIdx + 1);
 671     if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
 672       Opc = getVLDSTRegisterUpdateOpcode(Opc);
 673     Ops.push_back(Inc);
 674   }
 675   bool is64BitVector = VT.is64BitVector();
 676
 677   SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
 678                                N->op_begin() + Vec0Idx + NumVecs);
 679   SDValue SrcReg = is64BitVector ? createDTuple(Regs) : createQTuple(Regs);
 680   Ops.push_back(SrcReg);
 681
 682   // Push back the Chain
 683   Ops.push_back(N->getOperand(0));
 684
 685   // Transfer memoperands.
 686   SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 687   cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
 688
 689   return VSt;
 690 }
 691
 692 unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit,
 693                                         unsigned NumOfVec) {
 694   assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range");
 695
 696   unsigned Opc = 0;
 697   switch (NumOfVec) {
 698   default:
 699     break;
 700   case 1:
 701     if (IsExt)
 702       Opc = Is64Bit ? AArch64::TBX1_8b : AArch64::TBX1_16b;
 703     else
 704       Opc = Is64Bit ? AArch64::TBL1_8b : AArch64::TBL1_16b;
 705     break;
 706   case 2:
 707     if (IsExt)
 708       Opc = Is64Bit ? AArch64::TBX2_8b : AArch64::TBX2_16b;
 709     else
 710       Opc = Is64Bit ? AArch64::TBL2_8b : AArch64::TBL2_16b;
 711     break;
 712   case 3:
 713     if (IsExt)
 714       Opc = Is64Bit ? AArch64::TBX3_8b : AArch64::TBX3_16b;
 715     else
 716       Opc = Is64Bit ? AArch64::TBL3_8b : AArch64::TBL3_16b;
 717     break;
 718   case 4:
 719     if (IsExt)
 720       Opc = Is64Bit ? AArch64::TBX4_8b : AArch64::TBX4_16b;
 721     else
 722       Opc = Is64Bit ? AArch64::TBL4_8b : AArch64::TBL4_16b;
 723     break;
 724   }
 725
 726   return Opc;
 727 }
 728
 729 SDNode *AArch64DAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs,
 730                                         bool IsExt) {
 731   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
 732   SDLoc dl(N);
 733
 734   // Check the element of look up table is 64-bit or not
 735   unsigned Vec0Idx = IsExt ? 2 : 1;
 736   SDValue V0 = N->getOperand(Vec0Idx + 0);
 737   EVT VT = V0.getValueType();
 738   assert(!VT.is64BitVector() &&
 739          "The element of lookup table for vtbl and vtbx must be 128-bit");
 740
 741   // Check the return value type is 64-bit or not
 742   EVT ResVT = N->getValueType(0);
 743   bool is64BitRes = ResVT.is64BitVector();
 744
 745   // Create new SDValue for vector list
 746   SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
 747                                N->op_begin() + Vec0Idx + NumVecs);
 748   SDValue TblReg = createQTuple(Regs);
 749   unsigned Opc = getTBLOpc(IsExt, is64BitRes, NumVecs);
 750
 751   SmallVector<SDValue, 3> Ops;
 752   if (IsExt)
 753     Ops.push_back(N->getOperand(1));
 754   Ops.push_back(TblReg);
 755   Ops.push_back(N->getOperand(Vec0Idx + NumVecs));
 756   return CurDAG->getMachineNode(Opc, dl, ResVT, Ops);
 757 }
 758
 759 SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
 760   // Dump information about the Node being selected
 761   DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
 762
 763   if (Node->isMachineOpcode()) {
 764     DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
 765     Node->setNodeId(-1);
 766     return NULL;
 767   }
 768
 769   switch (Node->getOpcode()) {
 770   case ISD::ATOMIC_LOAD_ADD:
 771     return SelectAtomic(Node,
 772                         AArch64::ATOMIC_LOAD_ADD_I8,
 773                         AArch64::ATOMIC_LOAD_ADD_I16,
 774                         AArch64::ATOMIC_LOAD_ADD_I32,
 775                         AArch64::ATOMIC_LOAD_ADD_I64);
 776   case ISD::ATOMIC_LOAD_SUB:
 777     return SelectAtomic(Node,
 778                         AArch64::ATOMIC_LOAD_SUB_I8,
 779                         AArch64::ATOMIC_LOAD_SUB_I16,
 780                         AArch64::ATOMIC_LOAD_SUB_I32,
 781                         AArch64::ATOMIC_LOAD_SUB_I64);
 782   case ISD::ATOMIC_LOAD_AND:
 783     return SelectAtomic(Node,
 784                         AArch64::ATOMIC_LOAD_AND_I8,
 785                         AArch64::ATOMIC_LOAD_AND_I16,
 786                         AArch64::ATOMIC_LOAD_AND_I32,
 787                         AArch64::ATOMIC_LOAD_AND_I64);
 788   case ISD::ATOMIC_LOAD_OR:
 789     return SelectAtomic(Node,
 790                         AArch64::ATOMIC_LOAD_OR_I8,
 791                         AArch64::ATOMIC_LOAD_OR_I16,
 792                         AArch64::ATOMIC_LOAD_OR_I32,
 793                         AArch64::ATOMIC_LOAD_OR_I64);
 794   case ISD::ATOMIC_LOAD_XOR:
 795     return SelectAtomic(Node,
 796                         AArch64::ATOMIC_LOAD_XOR_I8,
 797                         AArch64::ATOMIC_LOAD_XOR_I16,
 798                         AArch64::ATOMIC_LOAD_XOR_I32,
 799                         AArch64::ATOMIC_LOAD_XOR_I64);
 800   case ISD::ATOMIC_LOAD_NAND:
 801     return SelectAtomic(Node,
 802                         AArch64::ATOMIC_LOAD_NAND_I8,
 803                         AArch64::ATOMIC_LOAD_NAND_I16,
 804                         AArch64::ATOMIC_LOAD_NAND_I32,
 805                         AArch64::ATOMIC_LOAD_NAND_I64);
 806   case ISD::ATOMIC_LOAD_MIN:
 807     return SelectAtomic(Node,
 808                         AArch64::ATOMIC_LOAD_MIN_I8,
 809                         AArch64::ATOMIC_LOAD_MIN_I16,
 810                         AArch64::ATOMIC_LOAD_MIN_I32,
 811                         AArch64::ATOMIC_LOAD_MIN_I64);
 812   case ISD::ATOMIC_LOAD_MAX:
 813     return SelectAtomic(Node,
 814                         AArch64::ATOMIC_LOAD_MAX_I8,
 815                         AArch64::ATOMIC_LOAD_MAX_I16,
 816                         AArch64::ATOMIC_LOAD_MAX_I32,
 817                         AArch64::ATOMIC_LOAD_MAX_I64);
 818   case ISD::ATOMIC_LOAD_UMIN:
 819     return SelectAtomic(Node,
 820                         AArch64::ATOMIC_LOAD_UMIN_I8,
 821                         AArch64::ATOMIC_LOAD_UMIN_I16,
 822                         AArch64::ATOMIC_LOAD_UMIN_I32,
 823                         AArch64::ATOMIC_LOAD_UMIN_I64);
 824   case ISD::ATOMIC_LOAD_UMAX:
 825     return SelectAtomic(Node,
 826                         AArch64::ATOMIC_LOAD_UMAX_I8,
 827                         AArch64::ATOMIC_LOAD_UMAX_I16,
 828                         AArch64::ATOMIC_LOAD_UMAX_I32,
 829                         AArch64::ATOMIC_LOAD_UMAX_I64);
 830   case ISD::ATOMIC_SWAP:
 831     return SelectAtomic(Node,
 832                         AArch64::ATOMIC_SWAP_I8,
 833                         AArch64::ATOMIC_SWAP_I16,
 834                         AArch64::ATOMIC_SWAP_I32,
 835                         AArch64::ATOMIC_SWAP_I64);
 836   case ISD::ATOMIC_CMP_SWAP:
 837     return SelectAtomic(Node,
 838                         AArch64::ATOMIC_CMP_SWAP_I8,
 839                         AArch64::ATOMIC_CMP_SWAP_I16,
 840                         AArch64::ATOMIC_CMP_SWAP_I32,
 841                         AArch64::ATOMIC_CMP_SWAP_I64);
 842   case ISD::FrameIndex: {
 843     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
 844     EVT PtrTy = getTargetLowering()->getPointerTy();
 845     SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy);
 846     return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy,
 847                                 TFI, CurDAG->getTargetConstant(0, PtrTy));
 848   }
 849   case ISD::ConstantPool: {
 850     // Constant pools are fine, just create a Target entry.
 851     ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Node);
 852     const Constant *C = CN->getConstVal();
 853     SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0));
 854
 855     ReplaceUses(SDValue(Node, 0), CP);
 856     return NULL;
 857   }
 858   case ISD::Constant: {
 859     SDNode *ResNode = 0;
 860     if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) {
 861       // XZR and WZR are probably even better than an actual move: most of the
 862       // time they can be folded into another instruction with *no* cost.
 863
 864       EVT Ty = Node->getValueType(0);
 865       assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type");
 866       uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR;
 867       ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
 868                                        SDLoc(Node),
 869                                        Register, Ty).getNode();
 870     }
 871
 872     // Next best option is a move-immediate, see if we can do that.
 873     if (!ResNode) {
 874       ResNode = TrySelectToMoveImm(Node);
 875     }
 876
 877     if (ResNode)
 878       return ResNode;
 879
 880     // If even that fails we fall back to a lit-pool entry at the moment. Future
 881     // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions.
 882     ResNode = SelectToLitPool(Node);
 883     assert(ResNode && "We need *some* way to materialise a constant");
 884
 885     // We want to continue selection at this point since the litpool access
 886     // generated used generic nodes for simplicity.
 887     ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
 888     Node = ResNode;
 889     break;
 890   }
 891   case ISD::ConstantFP: {
 892     if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) {
 893       // FMOV will take care of it from TableGen
 894       break;
 895     }
 896
 897     SDNode *ResNode = LowerToFPLitPool(Node);
 898     ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
 899
 900     // We want to continue selection at this point since the litpool access
 901     // generated used generic nodes for simplicity.
 902     Node = ResNode;
 903     break;
 904   }
 905   case AArch64ISD::NEON_LD1_UPD: {
 906     static const uint16_t Opcodes[] = {
 907       AArch64::LD1WB_8B_fixed,  AArch64::LD1WB_4H_fixed,
 908       AArch64::LD1WB_2S_fixed,  AArch64::LD1WB_1D_fixed,
 909       AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed,
 910       AArch64::LD1WB_4S_fixed,  AArch64::LD1WB_2D_fixed
 911     };
 912     return SelectVLD(Node, 1, true, Opcodes);
 913   }
 914   case AArch64ISD::NEON_LD2_UPD: {
 915     static const uint16_t Opcodes[] = {
 916       AArch64::LD2WB_8B_fixed,  AArch64::LD2WB_4H_fixed,
 917       AArch64::LD2WB_2S_fixed,  AArch64::LD1WB2V_1D_fixed,
 918       AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed,
 919       AArch64::LD2WB_4S_fixed,  AArch64::LD2WB_2D_fixed
 920     };
 921     return SelectVLD(Node, 2, true, Opcodes);
 922   }
 923   case AArch64ISD::NEON_LD3_UPD: {
 924     static const uint16_t Opcodes[] = {
 925       AArch64::LD3WB_8B_fixed,  AArch64::LD3WB_4H_fixed,
 926       AArch64::LD3WB_2S_fixed,  AArch64::LD1WB3V_1D_fixed,
 927       AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed,
 928       AArch64::LD3WB_4S_fixed,  AArch64::LD3WB_2D_fixed
 929     };
 930     return SelectVLD(Node, 3, true, Opcodes);
 931   }
 932   case AArch64ISD::NEON_LD4_UPD: {
 933     static const uint16_t Opcodes[] = {
 934       AArch64::LD4WB_8B_fixed,  AArch64::LD4WB_4H_fixed,
 935       AArch64::LD4WB_2S_fixed,  AArch64::LD1WB4V_1D_fixed,
 936       AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed,
 937       AArch64::LD4WB_4S_fixed,  AArch64::LD4WB_2D_fixed
 938     };
 939     return SelectVLD(Node, 4, true, Opcodes);
 940   }
 941   case AArch64ISD::NEON_ST1_UPD: {
 942     static const uint16_t Opcodes[] = {
 943       AArch64::ST1WB_8B_fixed,  AArch64::ST1WB_4H_fixed,
 944       AArch64::ST1WB_2S_fixed,  AArch64::ST1WB_1D_fixed,
 945       AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed,
 946       AArch64::ST1WB_4S_fixed,  AArch64::ST1WB_2D_fixed
 947     };
 948     return SelectVST(Node, 1, true, Opcodes);
 949   }
 950   case AArch64ISD::NEON_ST2_UPD: {
 951     static const uint16_t Opcodes[] = {
 952       AArch64::ST2WB_8B_fixed,  AArch64::ST2WB_4H_fixed,
 953       AArch64::ST2WB_2S_fixed,  AArch64::ST1WB2V_1D_fixed,
 954       AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed,
 955       AArch64::ST2WB_4S_fixed,  AArch64::ST2WB_2D_fixed
 956     };
 957     return SelectVST(Node, 2, true, Opcodes);
 958   }
 959   case AArch64ISD::NEON_ST3_UPD: {
 960     static const uint16_t Opcodes[] = {
 961       AArch64::ST3WB_8B_fixed,  AArch64::ST3WB_4H_fixed,
 962       AArch64::ST3WB_2S_fixed,  AArch64::ST1WB3V_1D_fixed,
 963       AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed,
 964       AArch64::ST3WB_4S_fixed,  AArch64::ST3WB_2D_fixed
 965     };
 966     return SelectVST(Node, 3, true, Opcodes);
 967   }
 968   case AArch64ISD::NEON_ST4_UPD: {
 969     static const uint16_t Opcodes[] = {
 970       AArch64::ST4WB_8B_fixed,  AArch64::ST4WB_4H_fixed,
 971       AArch64::ST4WB_2S_fixed,  AArch64::ST1WB4V_1D_fixed,
 972       AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed,
 973       AArch64::ST4WB_4S_fixed,  AArch64::ST4WB_2D_fixed
 974     };
 975     return SelectVST(Node, 4, true, Opcodes);
 976   }
 977   case ISD::INTRINSIC_WO_CHAIN: {
 978     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
 979     bool IsExt = false;
 980     switch (IntNo) {
 981       default:
 982         break;
 983       case Intrinsic::aarch64_neon_vtbx1:
 984         IsExt = true;
 985       case Intrinsic::aarch64_neon_vtbl1:
 986         return SelectVTBL(Node, 1, IsExt);
 987       case Intrinsic::aarch64_neon_vtbx2:
 988         IsExt = true;
 989       case Intrinsic::aarch64_neon_vtbl2:
 990         return SelectVTBL(Node, 2, IsExt);
 991       case Intrinsic::aarch64_neon_vtbx3:
 992         IsExt = true;
 993       case Intrinsic::aarch64_neon_vtbl3:
 994         return SelectVTBL(Node, 3, IsExt);
 995       case Intrinsic::aarch64_neon_vtbx4:
 996         IsExt = true;
 997       case Intrinsic::aarch64_neon_vtbl4:
 998         return SelectVTBL(Node, 4, IsExt);
 999     }
1000     break;
1001   }
1002   case ISD::INTRINSIC_VOID:
1003   case ISD::INTRINSIC_W_CHAIN: {
1004     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
1005     switch (IntNo) {
1006     default:
1007       break;
1008
1009     case Intrinsic::arm_neon_vld1: {
1010       static const uint16_t Opcodes[] = { AArch64::LD1_8B,  AArch64::LD1_4H,
1011                                           AArch64::LD1_2S,  AArch64::LD1_1D,
1012                                           AArch64::LD1_16B, AArch64::LD1_8H,
1013                                           AArch64::LD1_4S,  AArch64::LD1_2D };
1014       return SelectVLD(Node, 1, false, Opcodes);
1015     }
1016     case Intrinsic::arm_neon_vld2: {
1017       static const uint16_t Opcodes[] = { AArch64::LD2_8B,  AArch64::LD2_4H,
1018                                           AArch64::LD2_2S,  AArch64::LD1_2V_1D,
1019                                           AArch64::LD2_16B, AArch64::LD2_8H,
1020                                           AArch64::LD2_4S,  AArch64::LD2_2D };
1021       return SelectVLD(Node, 2, false, Opcodes);
1022     }
1023     case Intrinsic::arm_neon_vld3: {
1024       static const uint16_t Opcodes[] = { AArch64::LD3_8B,  AArch64::LD3_4H,
1025                                           AArch64::LD3_2S,  AArch64::LD1_3V_1D,
1026                                           AArch64::LD3_16B, AArch64::LD3_8H,
1027                                           AArch64::LD3_4S,  AArch64::LD3_2D };
1028       return SelectVLD(Node, 3, false, Opcodes);
1029     }
1030     case Intrinsic::arm_neon_vld4: {
1031       static const uint16_t Opcodes[] = { AArch64::LD4_8B,  AArch64::LD4_4H,
1032                                           AArch64::LD4_2S,  AArch64::LD1_4V_1D,
1033                                           AArch64::LD4_16B, AArch64::LD4_8H,
1034                                           AArch64::LD4_4S,  AArch64::LD4_2D };
1035       return SelectVLD(Node, 4, false, Opcodes);
1036     }
1037     case Intrinsic::arm_neon_vst1: {
1038       static const uint16_t Opcodes[] = { AArch64::ST1_8B,  AArch64::ST1_4H,
1039                                           AArch64::ST1_2S,  AArch64::ST1_1D,
1040                                           AArch64::ST1_16B, AArch64::ST1_8H,
1041                                           AArch64::ST1_4S,  AArch64::ST1_2D };
1042       return SelectVST(Node, 1, false, Opcodes);
1043     }
1044     case Intrinsic::arm_neon_vst2: {
1045       static const uint16_t Opcodes[] = { AArch64::ST2_8B,  AArch64::ST2_4H,
1046                                           AArch64::ST2_2S,  AArch64::ST1_2V_1D,
1047                                           AArch64::ST2_16B, AArch64::ST2_8H,
1048                                           AArch64::ST2_4S,  AArch64::ST2_2D };
1049       return SelectVST(Node, 2, false, Opcodes);
1050     }
1051     case Intrinsic::arm_neon_vst3: {
1052       static const uint16_t Opcodes[] = { AArch64::ST3_8B,  AArch64::ST3_4H,
1053                                           AArch64::ST3_2S,  AArch64::ST1_3V_1D,
1054                                           AArch64::ST3_16B, AArch64::ST3_8H,
1055                                           AArch64::ST3_4S,  AArch64::ST3_2D };
1056       return SelectVST(Node, 3, false, Opcodes);
1057     }
1058     case Intrinsic::arm_neon_vst4: {
1059       static const uint16_t Opcodes[] = { AArch64::ST4_8B,  AArch64::ST4_4H,
1060                                           AArch64::ST4_2S,  AArch64::ST1_4V_1D,
1061                                           AArch64::ST4_16B, AArch64::ST4_8H,
1062                                           AArch64::ST4_4S,  AArch64::ST4_2D };
1063       return SelectVST(Node, 4, false, Opcodes);
1064     }
1065     }
1066     break;
1067   }
1068   default:
1069     break; // Let generic code handle it
1070   }
1071
1072   SDNode *ResNode = SelectCode(Node);
1073
1074   DEBUG(dbgs() << "=> ";
1075         if (ResNode == NULL || ResNode == Node)
1076           Node->dump(CurDAG);
1077         else
1078           ResNode->dump(CurDAG);
1079         dbgs() << "\n");
1080
1081   return ResNode;
1082 }
1083
1084 /// This pass converts a legalized DAG into a AArch64-specific DAG, ready for
1085 /// instruction scheduling.
1086 FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM,
1087                                          CodeGenOpt::Level OptLevel) {
1088   return new AArch64DAGToDAGISel(TM, OptLevel);
1089 }