lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

   1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the AArch64 target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #define DEBUG_TYPE "aarch64-isel"
  15 #include "AArch64.h"
  16 #include "AArch64InstrInfo.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "Utils/AArch64BaseInfo.h"
  20 #include "llvm/ADT/APSInt.h"
  21 #include "llvm/CodeGen/SelectionDAGISel.h"
  22 #include "llvm/IR/GlobalValue.h"
  23 #include "llvm/Support/Debug.h"
  24 #include "llvm/Support/raw_ostream.h"
  25
  26 using namespace llvm;
  27
  28 //===--------------------------------------------------------------------===//
  29 /// AArch64 specific code to select AArch64 machine instructions for
  30 /// SelectionDAG operations.
  31 ///
  32 namespace {
  33
  34 class AArch64DAGToDAGISel : public SelectionDAGISel {
  35   AArch64TargetMachine &TM;
  36
  37   /// Keep a pointer to the AArch64Subtarget around so that we can
  38   /// make the right decision when generating code for different targets.
  39   const AArch64Subtarget *Subtarget;
  40
  41 public:
  42   explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
  43                                CodeGenOpt::Level OptLevel)
  44     : SelectionDAGISel(tm, OptLevel), TM(tm),
  45       Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
  46   }
  47
  48   virtual const char *getPassName() const {
  49     return "AArch64 Instruction Selection";
  50   }
  51
  52   // Include the pieces autogenerated from the target description.
  53 #include "AArch64GenDAGISel.inc"
  54
  55   template<unsigned MemSize>
  56   bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) {
  57     const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
  58     if (!CN || CN->getZExtValue() % MemSize != 0
  59         || CN->getZExtValue() / MemSize > 0xfff)
  60       return false;
  61
  62     UImm12 =  CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64);
  63     return true;
  64   }
  65
  66   template<unsigned RegWidth>
  67   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
  68     return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
  69   }
  70
  71   /// Used for pre-lowered address-reference nodes, so we already know
  72   /// the fields match. This operand's job is simply to add an
  73   /// appropriate shift operand to the MOVZ/MOVK instruction.
  74   template<unsigned LogShift>
  75   bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) {
  76     Imm = N;
  77     Shift = CurDAG->getTargetConstant(LogShift, MVT::i32);
  78     return true;
  79   }
  80
  81   bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
  82
  83   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
  84                                 unsigned RegWidth);
  85
  86   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
  87                                     char ConstraintCode,
  88                                     std::vector<SDValue> &OutOps);
  89
  90   bool SelectLogicalImm(SDValue N, SDValue &Imm);
  91
  92   template<unsigned RegWidth>
  93   bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) {
  94     return SelectTSTBOperand(N, FixedPos, RegWidth);
  95   }
  96
  97   bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
  98
  99   SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32,
 100                        unsigned Op64);
 101
 102   /// Put the given constant into a pool and return a DAG which will give its
 103   /// address.
 104   SDValue getConstantPoolItemAddress(SDLoc DL, const Constant *CV);
 105
 106   SDNode *TrySelectToMoveImm(SDNode *N);
 107   SDNode *LowerToFPLitPool(SDNode *Node);
 108   SDNode *SelectToLitPool(SDNode *N);
 109
 110   SDNode* Select(SDNode*);
 111 private:
 112   /// Select NEON load intrinsics.  NumVecs should be 1, 2, 3 or 4.
 113   SDNode *SelectVLD(SDNode *N, unsigned NumVecs, bool isUpdating,
 114                     const uint16_t *Opcode);
 115
 116   /// Select NEON store intrinsics.  NumVecs should be 1, 2, 3 or 4.
 117   SDNode *SelectVST(SDNode *N, unsigned NumVecs, bool isUpdating,
 118                     const uint16_t *Opcodes);
 119
 120   /// Form sequences of consecutive 64/128-bit registers for use in NEON
 121   /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
 122   /// between 1 and 4 elements. If it contains a single element that is returned
 123   /// unchanged; otherwise a REG_SEQUENCE value is returned.
 124   SDValue createDTuple(ArrayRef<SDValue> Vecs);
 125   SDValue createQTuple(ArrayRef<SDValue> Vecs);
 126
 127   /// Generic helper for the createDTuple/createQTuple
 128   /// functions. Those should almost always be called instead.
 129   SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
 130                       unsigned SubRegs[]);
 131 };
 132 }
 133
 134 bool
 135 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
 136                                               unsigned RegWidth) {
 137   const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
 138   if (!CN) return false;
 139
 140   // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
 141   // is between 1 and 32 for a destination w-register, or 1 and 64 for an
 142   // x-register.
 143   //
 144   // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
 145   // want THIS_NODE to be 2^fbits. This is much easier to deal with using
 146   // integers.
 147   bool IsExact;
 148
 149   // fbits is between 1 and 64 in the worst-case, which means the fmul
 150   // could have 2^64 as an actual operand. Need 65 bits of precision.
 151   APSInt IntVal(65, true);
 152   CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
 153
 154   // N.b. isPowerOf2 also checks for > 0.
 155   if (!IsExact || !IntVal.isPowerOf2()) return false;
 156   unsigned FBits = IntVal.logBase2();
 157
 158   // Checks above should have guaranteed that we haven't lost information in
 159   // finding FBits, but it must still be in range.
 160   if (FBits == 0 || FBits > RegWidth) return false;
 161
 162   FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32);
 163   return true;
 164 }
 165
 166 bool
 167 AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
 168                                                  char ConstraintCode,
 169                                                  std::vector<SDValue> &OutOps) {
 170   switch (ConstraintCode) {
 171   default: llvm_unreachable("Unrecognised AArch64 memory constraint");
 172   case 'm':
 173     // FIXME: more freedom is actually permitted for 'm'. We can go
 174     // hunting for a base and an offset if we want. Of course, since
 175     // we don't really know how the operand is going to be used we're
 176     // probably restricted to the load/store pair's simm7 as an offset
 177     // range anyway.
 178   case 'Q':
 179     OutOps.push_back(Op);
 180   }
 181
 182   return false;
 183 }
 184
 185 bool
 186 AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) {
 187   ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N);
 188   if (!Imm || !Imm->getValueAPF().isPosZero())
 189     return false;
 190
 191   // Doesn't actually carry any information, but keeps TableGen quiet.
 192   Dummy = CurDAG->getTargetConstant(0, MVT::i32);
 193   return true;
 194 }
 195
 196 bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) {
 197   uint32_t Bits;
 198   uint32_t RegWidth = N.getValueType().getSizeInBits();
 199
 200   ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
 201   if (!CN) return false;
 202
 203   if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits))
 204     return false;
 205
 206   Imm = CurDAG->getTargetConstant(Bits, MVT::i32);
 207   return true;
 208 }
 209
 210 SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) {
 211   SDNode *ResNode;
 212   SDLoc dl(Node);
 213   EVT DestType = Node->getValueType(0);
 214   unsigned DestWidth = DestType.getSizeInBits();
 215
 216   unsigned MOVOpcode;
 217   EVT MOVType;
 218   int UImm16, Shift;
 219   uint32_t LogicalBits;
 220
 221   uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue();
 222   if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) {
 223     MOVType = DestType;
 224     MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii;
 225   } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) {
 226     MOVType = DestType;
 227     MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii;
 228   } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) {
 229     // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can
 230     // use a 32-bit instruction: "movn w0, 0xedbc".
 231     MOVType = MVT::i32;
 232     MOVOpcode = AArch64::MOVNwii;
 233   } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits))  {
 234     MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi;
 235     uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR;
 236
 237     return CurDAG->getMachineNode(MOVOpcode, dl, DestType,
 238                               CurDAG->getRegister(ZR, DestType),
 239                               CurDAG->getTargetConstant(LogicalBits, MVT::i32));
 240   } else {
 241     // Can't handle it in one instruction. There's scope for permitting two (or
 242     // more) instructions, but that'll need more thought.
 243     return NULL;
 244   }
 245
 246   ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType,
 247                                    CurDAG->getTargetConstant(UImm16, MVT::i32),
 248                                    CurDAG->getTargetConstant(Shift, MVT::i32));
 249
 250   if (MOVType != DestType) {
 251     ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
 252                           MVT::i64, MVT::i32, MVT::Other,
 253                           CurDAG->getTargetConstant(0, MVT::i64),
 254                           SDValue(ResNode, 0),
 255                           CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32));
 256   }
 257
 258   return ResNode;
 259 }
 260
 261 SDValue
 262 AArch64DAGToDAGISel::getConstantPoolItemAddress(SDLoc DL,
 263                                                 const Constant *CV) {
 264   EVT PtrVT = getTargetLowering()->getPointerTy();
 265
 266   switch (getTargetLowering()->getTargetMachine().getCodeModel()) {
 267   case CodeModel::Small: {
 268     unsigned Alignment =
 269       getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType());
 270     return CurDAG->getNode(
 271         AArch64ISD::WrapperSmall, DL, PtrVT,
 272         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG),
 273         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12),
 274         CurDAG->getConstant(Alignment, MVT::i32));
 275   }
 276   case CodeModel::Large: {
 277     SDNode *LitAddr;
 278     LitAddr = CurDAG->getMachineNode(
 279         AArch64::MOVZxii, DL, PtrVT,
 280         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3),
 281         CurDAG->getTargetConstant(3, MVT::i32));
 282     LitAddr = CurDAG->getMachineNode(
 283         AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
 284         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC),
 285         CurDAG->getTargetConstant(2, MVT::i32));
 286     LitAddr = CurDAG->getMachineNode(
 287         AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
 288         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC),
 289         CurDAG->getTargetConstant(1, MVT::i32));
 290     LitAddr = CurDAG->getMachineNode(
 291         AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
 292         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC),
 293         CurDAG->getTargetConstant(0, MVT::i32));
 294     return SDValue(LitAddr, 0);
 295   }
 296   default:
 297     llvm_unreachable("Only small and large code models supported now");
 298   }
 299 }
 300
 301 SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
 302   SDLoc DL(Node);
 303   uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue();
 304   int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue();
 305   EVT DestType = Node->getValueType(0);
 306
 307   // Since we may end up loading a 64-bit constant from a 32-bit entry the
 308   // constant in the pool may have a different type to the eventual node.
 309   ISD::LoadExtType Extension;
 310   EVT MemType;
 311
 312   assert((DestType == MVT::i64 || DestType == MVT::i32)
 313          && "Only expect integer constants at the moment");
 314
 315   if (DestType == MVT::i32) {
 316     Extension = ISD::NON_EXTLOAD;
 317     MemType = MVT::i32;
 318   } else if (UnsignedVal <= UINT32_MAX) {
 319     Extension = ISD::ZEXTLOAD;
 320     MemType = MVT::i32;
 321   } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) {
 322     Extension = ISD::SEXTLOAD;
 323     MemType = MVT::i32;
 324   } else {
 325     Extension = ISD::NON_EXTLOAD;
 326     MemType = MVT::i64;
 327   }
 328
 329   Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(),
 330                                                   MemType.getSizeInBits()),
 331                                   UnsignedVal);
 332   SDValue PoolAddr = getConstantPoolItemAddress(DL, CV);
 333   unsigned Alignment =
 334     getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType());
 335
 336   return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(),
 337                             PoolAddr,
 338                             MachinePointerInfo::getConstantPool(), MemType,
 339                             /* isVolatile = */ false,
 340                             /* isNonTemporal = */ false,
 341                             Alignment).getNode();
 342 }
 343
 344 SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) {
 345   SDLoc DL(Node);
 346   const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue();
 347   EVT DestType = Node->getValueType(0);
 348
 349   unsigned Alignment =
 350     getTargetLowering()->getDataLayout()->getABITypeAlignment(FV->getType());
 351   SDValue PoolAddr = getConstantPoolItemAddress(DL, FV);
 352
 353   return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr,
 354                          MachinePointerInfo::getConstantPool(),
 355                          /* isVolatile = */ false,
 356                          /* isNonTemporal = */ false,
 357                          /* isInvariant = */ true,
 358                          Alignment).getNode();
 359 }
 360
 361 bool
 362 AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
 363                                        unsigned RegWidth) {
 364   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
 365   if (!CN) return false;
 366
 367   uint64_t Val = CN->getZExtValue();
 368
 369   if (!isPowerOf2_64(Val)) return false;
 370
 371   unsigned TestedBit = Log2_64(Val);
 372   // Checks above should have guaranteed that we haven't lost information in
 373   // finding TestedBit, but it must still be in range.
 374   if (TestedBit >= RegWidth) return false;
 375
 376   FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64);
 377   return true;
 378 }
 379
 380 SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
 381                                           unsigned Op16,unsigned Op32,
 382                                           unsigned Op64) {
 383   // Mostly direct translation to the given operations, except that we preserve
 384   // the AtomicOrdering for use later on.
 385   AtomicSDNode *AN = cast<AtomicSDNode>(Node);
 386   EVT VT = AN->getMemoryVT();
 387
 388   unsigned Op;
 389   if (VT == MVT::i8)
 390     Op = Op8;
 391   else if (VT == MVT::i16)
 392     Op = Op16;
 393   else if (VT == MVT::i32)
 394     Op = Op32;
 395   else if (VT == MVT::i64)
 396     Op = Op64;
 397   else
 398     llvm_unreachable("Unexpected atomic operation");
 399
 400   SmallVector<SDValue, 4> Ops;
 401   for (unsigned i = 1; i < AN->getNumOperands(); ++i)
 402       Ops.push_back(AN->getOperand(i));
 403
 404   Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
 405   Ops.push_back(AN->getOperand(0)); // Chain moves to the end
 406
 407   return CurDAG->SelectNodeTo(Node, Op,
 408                               AN->getValueType(0), MVT::Other,
 409                               &Ops[0], Ops.size());
 410 }
 411
 412 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
 413   static unsigned RegClassIDs[] = { AArch64::DPairRegClassID,
 414                                     AArch64::DTripleRegClassID,
 415                                     AArch64::DQuadRegClassID };
 416   static unsigned SubRegs[] = { AArch64::dsub_0, AArch64::dsub_1,
 417                                 AArch64::dsub_2, AArch64::dsub_3 };
 418
 419   return createTuple(Regs, RegClassIDs, SubRegs);
 420 }
 421
 422 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
 423   static unsigned RegClassIDs[] = { AArch64::QPairRegClassID,
 424                                     AArch64::QTripleRegClassID,
 425                                     AArch64::QQuadRegClassID };
 426   static unsigned SubRegs[] = { AArch64::qsub_0, AArch64::qsub_1,
 427                                 AArch64::qsub_2, AArch64::qsub_3 };
 428
 429   return createTuple(Regs, RegClassIDs, SubRegs);
 430 }
 431
 432 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
 433                                          unsigned RegClassIDs[],
 434                                          unsigned SubRegs[]) {
 435   // There's no special register-class for a vector-list of 1 element: it's just
 436   // a vector.
 437   if (Regs.size() == 1)
 438     return Regs[0];
 439
 440   assert(Regs.size() >= 2 && Regs.size() <= 4);
 441
 442   SDLoc DL(Regs[0].getNode());
 443
 444   SmallVector<SDValue, 4> Ops;
 445
 446   // First operand of REG_SEQUENCE is the desired RegClass.
 447   Ops.push_back(
 448       CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));
 449
 450   // Then we get pairs of source & subregister-position for the components.
 451   for (unsigned i = 0; i < Regs.size(); ++i) {
 452     Ops.push_back(Regs[i]);
 453     Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));
 454   }
 455
 456   SDNode *N =
 457       CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
 458   return SDValue(N, 0);
 459 }
 460
 461
 462 // Get the register stride update opcode of a VLD/VST instruction that
 463 // is otherwise equivalent to the given fixed stride updating instruction.
 464 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
 465   switch (Opc) {
 466   default: break;
 467   case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register;
 468   case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register;
 469   case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register;
 470   case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register;
 471   case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register;
 472   case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register;
 473   case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register;
 474   case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register;
 475
 476   case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register;
 477   case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register;
 478   case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register;
 479   case AArch64::LD1WB2V_1D_fixed: return AArch64::LD1WB2V_1D_register;
 480   case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register;
 481   case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register;
 482   case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register;
 483   case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register;
 484
 485   case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register;
 486   case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register;
 487   case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register;
 488   case AArch64::LD1WB3V_1D_fixed: return AArch64::LD1WB3V_1D_register;
 489   case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register;
 490   case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register;
 491   case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register;
 492   case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register;
 493
 494   case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register;
 495   case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register;
 496   case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register;
 497   case AArch64::LD1WB4V_1D_fixed: return AArch64::LD1WB4V_1D_register;
 498   case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register;
 499   case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register;
 500   case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register;
 501   case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register;
 502
 503   case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register;
 504   case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register;
 505   case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register;
 506   case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register;
 507   case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register;
 508   case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register;
 509   case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register;
 510   case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register;
 511
 512   case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register;
 513   case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register;
 514   case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register;
 515   case AArch64::ST1WB2V_1D_fixed: return AArch64::ST1WB2V_1D_register;
 516   case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register;
 517   case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register;
 518   case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register;
 519   case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register;
 520
 521   case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register;
 522   case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register;
 523   case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register;
 524   case AArch64::ST1WB3V_1D_fixed: return AArch64::ST1WB3V_1D_register;
 525   case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register;
 526   case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register;
 527   case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register;
 528   case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register;
 529
 530   case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register;
 531   case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register;
 532   case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register;
 533   case AArch64::ST1WB4V_1D_fixed: return AArch64::ST1WB4V_1D_register;
 534   case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register;
 535   case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register;
 536   case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register;
 537   case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register;
 538   }
 539   return Opc; // If not one we handle, return it unchanged.
 540 }
 541
 542 SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
 543                                        bool isUpdating,
 544                                        const uint16_t *Opcodes) {
 545   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
 546
 547   EVT VT = N->getValueType(0);
 548   unsigned OpcodeIndex;
 549   switch (VT.getSimpleVT().SimpleTy) {
 550   default: llvm_unreachable("unhandled vector load type");
 551   case MVT::v8i8:  OpcodeIndex = 0; break;
 552   case MVT::v4i16: OpcodeIndex = 1; break;
 553   case MVT::v2f32:
 554   case MVT::v2i32: OpcodeIndex = 2; break;
 555   case MVT::v1f64:
 556   case MVT::v1i64: OpcodeIndex = 3; break;
 557   case MVT::v16i8: OpcodeIndex = 4; break;
 558   case MVT::v8f16:
 559   case MVT::v8i16: OpcodeIndex = 5; break;
 560   case MVT::v4f32:
 561   case MVT::v4i32: OpcodeIndex = 6; break;
 562   case MVT::v2f64:
 563   case MVT::v2i64: OpcodeIndex = 7; break;
 564   }
 565   unsigned Opc = Opcodes[OpcodeIndex];
 566
 567   SmallVector<SDValue, 2> Ops;
 568   unsigned AddrOpIdx = isUpdating ? 1 : 2;
 569   Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
 570
 571   if (isUpdating) {
 572     SDValue Inc = N->getOperand(AddrOpIdx + 1);
 573     if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
 574       Opc = getVLDSTRegisterUpdateOpcode(Opc);
 575     Ops.push_back(Inc);
 576   }
 577
 578   Ops.push_back(N->getOperand(0)); // Push back the Chain
 579
 580   std::vector<EVT> ResTys;
 581   bool is64BitVector = VT.is64BitVector();
 582
 583   if (NumVecs == 1)
 584     ResTys.push_back(VT);
 585   else if (NumVecs == 3)
 586     ResTys.push_back(MVT::Untyped);
 587   else {
 588     EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
 589                                  is64BitVector ? NumVecs : NumVecs * 2);
 590     ResTys.push_back(ResTy);
 591   }
 592
 593   if (isUpdating)
 594     ResTys.push_back(MVT::i64); // Type of the updated register
 595   ResTys.push_back(MVT::Other); // Type of the Chain
 596   SDLoc dl(N);
 597   SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 598
 599   // Transfer memoperands.
 600   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
 601   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
 602   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
 603
 604   if (NumVecs == 1)
 605     return VLd;
 606
 607   // If NumVecs > 1, the return result is a super register containing 2-4
 608   // consecutive vector registers.
 609   SDValue SuperReg = SDValue(VLd, 0);
 610
 611   unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
 612   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
 613     ReplaceUses(SDValue(N, Vec),
 614                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
 615   // Update users of the Chain
 616   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
 617   if (isUpdating)
 618     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
 619
 620   return NULL;
 621 }
 622
 623 SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
 624                                        bool isUpdating,
 625                                        const uint16_t *Opcodes) {
 626   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
 627   SDLoc dl(N);
 628
 629   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
 630   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
 631
 632   unsigned AddrOpIdx = isUpdating ? 1 : 2;
 633   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
 634   EVT VT = N->getOperand(Vec0Idx).getValueType();
 635   unsigned OpcodeIndex;
 636   switch (VT.getSimpleVT().SimpleTy) {
 637   default: llvm_unreachable("unhandled vector store type");
 638   case MVT::v8i8:  OpcodeIndex = 0; break;
 639   case MVT::v4i16: OpcodeIndex = 1; break;
 640   case MVT::v2f32:
 641   case MVT::v2i32: OpcodeIndex = 2; break;
 642   case MVT::v1f64:
 643   case MVT::v1i64: OpcodeIndex = 3; break;
 644   case MVT::v16i8: OpcodeIndex = 4; break;
 645   case MVT::v8f16:
 646   case MVT::v8i16: OpcodeIndex = 5; break;
 647   case MVT::v4f32:
 648   case MVT::v4i32: OpcodeIndex = 6; break;
 649   case MVT::v2f64:
 650   case MVT::v2i64: OpcodeIndex = 7; break;
 651   }
 652   unsigned Opc = Opcodes[OpcodeIndex];
 653
 654   std::vector<EVT> ResTys;
 655   if (isUpdating)
 656     ResTys.push_back(MVT::i64);
 657   ResTys.push_back(MVT::Other); // Type for the Chain
 658
 659   SmallVector<SDValue, 6> Ops;
 660   Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
 661
 662   if (isUpdating) {
 663     SDValue Inc = N->getOperand(AddrOpIdx + 1);
 664     if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
 665       Opc = getVLDSTRegisterUpdateOpcode(Opc);
 666     Ops.push_back(Inc);
 667   }
 668   bool is64BitVector = VT.is64BitVector();
 669
 670   SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
 671                                N->op_begin() + Vec0Idx + NumVecs);
 672   SDValue SrcReg = is64BitVector ? createDTuple(Regs) : createQTuple(Regs);
 673   Ops.push_back(SrcReg);
 674
 675   // Push back the Chain
 676   Ops.push_back(N->getOperand(0));
 677
 678   // Transfer memoperands.
 679   SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 680   cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
 681
 682   return VSt;
 683 }
 684
 685 SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
 686   // Dump information about the Node being selected
 687   DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
 688
 689   if (Node->isMachineOpcode()) {
 690     DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
 691     Node->setNodeId(-1);
 692     return NULL;
 693   }
 694
 695   switch (Node->getOpcode()) {
 696   case ISD::ATOMIC_LOAD_ADD:
 697     return SelectAtomic(Node,
 698                         AArch64::ATOMIC_LOAD_ADD_I8,
 699                         AArch64::ATOMIC_LOAD_ADD_I16,
 700                         AArch64::ATOMIC_LOAD_ADD_I32,
 701                         AArch64::ATOMIC_LOAD_ADD_I64);
 702   case ISD::ATOMIC_LOAD_SUB:
 703     return SelectAtomic(Node,
 704                         AArch64::ATOMIC_LOAD_SUB_I8,
 705                         AArch64::ATOMIC_LOAD_SUB_I16,
 706                         AArch64::ATOMIC_LOAD_SUB_I32,
 707                         AArch64::ATOMIC_LOAD_SUB_I64);
 708   case ISD::ATOMIC_LOAD_AND:
 709     return SelectAtomic(Node,
 710                         AArch64::ATOMIC_LOAD_AND_I8,
 711                         AArch64::ATOMIC_LOAD_AND_I16,
 712                         AArch64::ATOMIC_LOAD_AND_I32,
 713                         AArch64::ATOMIC_LOAD_AND_I64);
 714   case ISD::ATOMIC_LOAD_OR:
 715     return SelectAtomic(Node,
 716                         AArch64::ATOMIC_LOAD_OR_I8,
 717                         AArch64::ATOMIC_LOAD_OR_I16,
 718                         AArch64::ATOMIC_LOAD_OR_I32,
 719                         AArch64::ATOMIC_LOAD_OR_I64);
 720   case ISD::ATOMIC_LOAD_XOR:
 721     return SelectAtomic(Node,
 722                         AArch64::ATOMIC_LOAD_XOR_I8,
 723                         AArch64::ATOMIC_LOAD_XOR_I16,
 724                         AArch64::ATOMIC_LOAD_XOR_I32,
 725                         AArch64::ATOMIC_LOAD_XOR_I64);
 726   case ISD::ATOMIC_LOAD_NAND:
 727     return SelectAtomic(Node,
 728                         AArch64::ATOMIC_LOAD_NAND_I8,
 729                         AArch64::ATOMIC_LOAD_NAND_I16,
 730                         AArch64::ATOMIC_LOAD_NAND_I32,
 731                         AArch64::ATOMIC_LOAD_NAND_I64);
 732   case ISD::ATOMIC_LOAD_MIN:
 733     return SelectAtomic(Node,
 734                         AArch64::ATOMIC_LOAD_MIN_I8,
 735                         AArch64::ATOMIC_LOAD_MIN_I16,
 736                         AArch64::ATOMIC_LOAD_MIN_I32,
 737                         AArch64::ATOMIC_LOAD_MIN_I64);
 738   case ISD::ATOMIC_LOAD_MAX:
 739     return SelectAtomic(Node,
 740                         AArch64::ATOMIC_LOAD_MAX_I8,
 741                         AArch64::ATOMIC_LOAD_MAX_I16,
 742                         AArch64::ATOMIC_LOAD_MAX_I32,
 743                         AArch64::ATOMIC_LOAD_MAX_I64);
 744   case ISD::ATOMIC_LOAD_UMIN:
 745     return SelectAtomic(Node,
 746                         AArch64::ATOMIC_LOAD_UMIN_I8,
 747                         AArch64::ATOMIC_LOAD_UMIN_I16,
 748                         AArch64::ATOMIC_LOAD_UMIN_I32,
 749                         AArch64::ATOMIC_LOAD_UMIN_I64);
 750   case ISD::ATOMIC_LOAD_UMAX:
 751     return SelectAtomic(Node,
 752                         AArch64::ATOMIC_LOAD_UMAX_I8,
 753                         AArch64::ATOMIC_LOAD_UMAX_I16,
 754                         AArch64::ATOMIC_LOAD_UMAX_I32,
 755                         AArch64::ATOMIC_LOAD_UMAX_I64);
 756   case ISD::ATOMIC_SWAP:
 757     return SelectAtomic(Node,
 758                         AArch64::ATOMIC_SWAP_I8,
 759                         AArch64::ATOMIC_SWAP_I16,
 760                         AArch64::ATOMIC_SWAP_I32,
 761                         AArch64::ATOMIC_SWAP_I64);
 762   case ISD::ATOMIC_CMP_SWAP:
 763     return SelectAtomic(Node,
 764                         AArch64::ATOMIC_CMP_SWAP_I8,
 765                         AArch64::ATOMIC_CMP_SWAP_I16,
 766                         AArch64::ATOMIC_CMP_SWAP_I32,
 767                         AArch64::ATOMIC_CMP_SWAP_I64);
 768   case ISD::FrameIndex: {
 769     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
 770     EVT PtrTy = getTargetLowering()->getPointerTy();
 771     SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy);
 772     return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy,
 773                                 TFI, CurDAG->getTargetConstant(0, PtrTy));
 774   }
 775   case ISD::ConstantPool: {
 776     // Constant pools are fine, just create a Target entry.
 777     ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Node);
 778     const Constant *C = CN->getConstVal();
 779     SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0));
 780
 781     ReplaceUses(SDValue(Node, 0), CP);
 782     return NULL;
 783   }
 784   case ISD::Constant: {
 785     SDNode *ResNode = 0;
 786     if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) {
 787       // XZR and WZR are probably even better than an actual move: most of the
 788       // time they can be folded into another instruction with *no* cost.
 789
 790       EVT Ty = Node->getValueType(0);
 791       assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type");
 792       uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR;
 793       ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
 794                                        SDLoc(Node),
 795                                        Register, Ty).getNode();
 796     }
 797
 798     // Next best option is a move-immediate, see if we can do that.
 799     if (!ResNode) {
 800       ResNode = TrySelectToMoveImm(Node);
 801     }
 802
 803     if (ResNode)
 804       return ResNode;
 805
 806     // If even that fails we fall back to a lit-pool entry at the moment. Future
 807     // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions.
 808     ResNode = SelectToLitPool(Node);
 809     assert(ResNode && "We need *some* way to materialise a constant");
 810
 811     // We want to continue selection at this point since the litpool access
 812     // generated used generic nodes for simplicity.
 813     ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
 814     Node = ResNode;
 815     break;
 816   }
 817   case ISD::ConstantFP: {
 818     if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) {
 819       // FMOV will take care of it from TableGen
 820       break;
 821     }
 822
 823     SDNode *ResNode = LowerToFPLitPool(Node);
 824     ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
 825
 826     // We want to continue selection at this point since the litpool access
 827     // generated used generic nodes for simplicity.
 828     Node = ResNode;
 829     break;
 830   }
 831   case AArch64ISD::NEON_LD1_UPD: {
 832     static const uint16_t Opcodes[] = {
 833       AArch64::LD1WB_8B_fixed,  AArch64::LD1WB_4H_fixed,
 834       AArch64::LD1WB_2S_fixed,  AArch64::LD1WB_1D_fixed,
 835       AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed,
 836       AArch64::LD1WB_4S_fixed,  AArch64::LD1WB_2D_fixed
 837     };
 838     return SelectVLD(Node, 1, true, Opcodes);
 839   }
 840   case AArch64ISD::NEON_LD2_UPD: {
 841     static const uint16_t Opcodes[] = {
 842       AArch64::LD2WB_8B_fixed,  AArch64::LD2WB_4H_fixed,
 843       AArch64::LD2WB_2S_fixed,  AArch64::LD1WB2V_1D_fixed,
 844       AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed,
 845       AArch64::LD2WB_4S_fixed,  AArch64::LD2WB_2D_fixed
 846     };
 847     return SelectVLD(Node, 2, true, Opcodes);
 848   }
 849   case AArch64ISD::NEON_LD3_UPD: {
 850     static const uint16_t Opcodes[] = {
 851       AArch64::LD3WB_8B_fixed,  AArch64::LD3WB_4H_fixed,
 852       AArch64::LD3WB_2S_fixed,  AArch64::LD1WB3V_1D_fixed,
 853       AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed,
 854       AArch64::LD3WB_4S_fixed,  AArch64::LD3WB_2D_fixed
 855     };
 856     return SelectVLD(Node, 3, true, Opcodes);
 857   }
 858   case AArch64ISD::NEON_LD4_UPD: {
 859     static const uint16_t Opcodes[] = {
 860       AArch64::LD4WB_8B_fixed,  AArch64::LD4WB_4H_fixed,
 861       AArch64::LD4WB_2S_fixed,  AArch64::LD1WB4V_1D_fixed,
 862       AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed,
 863       AArch64::LD4WB_4S_fixed,  AArch64::LD4WB_2D_fixed
 864     };
 865     return SelectVLD(Node, 4, true, Opcodes);
 866   }
 867   case AArch64ISD::NEON_ST1_UPD: {
 868     static const uint16_t Opcodes[] = {
 869       AArch64::ST1WB_8B_fixed,  AArch64::ST1WB_4H_fixed,
 870       AArch64::ST1WB_2S_fixed,  AArch64::ST1WB_1D_fixed,
 871       AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed,
 872       AArch64::ST1WB_4S_fixed,  AArch64::ST1WB_2D_fixed
 873     };
 874     return SelectVST(Node, 1, true, Opcodes);
 875   }
 876   case AArch64ISD::NEON_ST2_UPD: {
 877     static const uint16_t Opcodes[] = {
 878       AArch64::ST2WB_8B_fixed,  AArch64::ST2WB_4H_fixed,
 879       AArch64::ST2WB_2S_fixed,  AArch64::ST1WB2V_1D_fixed,
 880       AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed,
 881       AArch64::ST2WB_4S_fixed,  AArch64::ST2WB_2D_fixed
 882     };
 883     return SelectVST(Node, 2, true, Opcodes);
 884   }
 885   case AArch64ISD::NEON_ST3_UPD: {
 886     static const uint16_t Opcodes[] = {
 887       AArch64::ST3WB_8B_fixed,  AArch64::ST3WB_4H_fixed,
 888       AArch64::ST3WB_2S_fixed,  AArch64::ST1WB3V_1D_fixed,
 889       AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed,
 890       AArch64::ST3WB_4S_fixed,  AArch64::ST3WB_2D_fixed
 891     };
 892     return SelectVST(Node, 3, true, Opcodes);
 893   }
 894   case AArch64ISD::NEON_ST4_UPD: {
 895     static const uint16_t Opcodes[] = {
 896       AArch64::ST4WB_8B_fixed,  AArch64::ST4WB_4H_fixed,
 897       AArch64::ST4WB_2S_fixed,  AArch64::ST1WB4V_1D_fixed,
 898       AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed,
 899       AArch64::ST4WB_4S_fixed,  AArch64::ST4WB_2D_fixed
 900     };
 901     return SelectVST(Node, 4, true, Opcodes);
 902   }
 903   case ISD::INTRINSIC_VOID:
 904   case ISD::INTRINSIC_W_CHAIN: {
 905     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
 906     switch (IntNo) {
 907     default:
 908       break;
 909
 910     case Intrinsic::arm_neon_vld1: {
 911       static const uint16_t Opcodes[] = { AArch64::LD1_8B,  AArch64::LD1_4H,
 912                                           AArch64::LD1_2S,  AArch64::LD1_1D,
 913                                           AArch64::LD1_16B, AArch64::LD1_8H,
 914                                           AArch64::LD1_4S,  AArch64::LD1_2D };
 915       return SelectVLD(Node, 1, false, Opcodes);
 916     }
 917     case Intrinsic::arm_neon_vld2: {
 918       static const uint16_t Opcodes[] = { AArch64::LD2_8B,  AArch64::LD2_4H,
 919                                           AArch64::LD2_2S,  AArch64::LD1_2V_1D,
 920                                           AArch64::LD2_16B, AArch64::LD2_8H,
 921                                           AArch64::LD2_4S,  AArch64::LD2_2D };
 922       return SelectVLD(Node, 2, false, Opcodes);
 923     }
 924     case Intrinsic::arm_neon_vld3: {
 925       static const uint16_t Opcodes[] = { AArch64::LD3_8B,  AArch64::LD3_4H,
 926                                           AArch64::LD3_2S,  AArch64::LD1_3V_1D,
 927                                           AArch64::LD3_16B, AArch64::LD3_8H,
 928                                           AArch64::LD3_4S,  AArch64::LD3_2D };
 929       return SelectVLD(Node, 3, false, Opcodes);
 930     }
 931     case Intrinsic::arm_neon_vld4: {
 932       static const uint16_t Opcodes[] = { AArch64::LD4_8B,  AArch64::LD4_4H,
 933                                           AArch64::LD4_2S,  AArch64::LD1_4V_1D,
 934                                           AArch64::LD4_16B, AArch64::LD4_8H,
 935                                           AArch64::LD4_4S,  AArch64::LD4_2D };
 936       return SelectVLD(Node, 4, false, Opcodes);
 937     }
 938     case Intrinsic::arm_neon_vst1: {
 939       static const uint16_t Opcodes[] = { AArch64::ST1_8B,  AArch64::ST1_4H,
 940                                           AArch64::ST1_2S,  AArch64::ST1_1D,
 941                                           AArch64::ST1_16B, AArch64::ST1_8H,
 942                                           AArch64::ST1_4S,  AArch64::ST1_2D };
 943       return SelectVST(Node, 1, false, Opcodes);
 944     }
 945     case Intrinsic::arm_neon_vst2: {
 946       static const uint16_t Opcodes[] = { AArch64::ST2_8B,  AArch64::ST2_4H,
 947                                           AArch64::ST2_2S,  AArch64::ST1_2V_1D,
 948                                           AArch64::ST2_16B, AArch64::ST2_8H,
 949                                           AArch64::ST2_4S,  AArch64::ST2_2D };
 950       return SelectVST(Node, 2, false, Opcodes);
 951     }
 952     case Intrinsic::arm_neon_vst3: {
 953       static const uint16_t Opcodes[] = { AArch64::ST3_8B,  AArch64::ST3_4H,
 954                                           AArch64::ST3_2S,  AArch64::ST1_3V_1D,
 955                                           AArch64::ST3_16B, AArch64::ST3_8H,
 956                                           AArch64::ST3_4S,  AArch64::ST3_2D };
 957       return SelectVST(Node, 3, false, Opcodes);
 958     }
 959     case Intrinsic::arm_neon_vst4: {
 960       static const uint16_t Opcodes[] = { AArch64::ST4_8B,  AArch64::ST4_4H,
 961                                           AArch64::ST4_2S,  AArch64::ST1_4V_1D,
 962                                           AArch64::ST4_16B, AArch64::ST4_8H,
 963                                           AArch64::ST4_4S,  AArch64::ST4_2D };
 964       return SelectVST(Node, 4, false, Opcodes);
 965     }
 966     }
 967     break;
 968   }
 969   default:
 970     break; // Let generic code handle it
 971   }
 972
 973   SDNode *ResNode = SelectCode(Node);
 974
 975   DEBUG(dbgs() << "=> ";
 976         if (ResNode == NULL || ResNode == Node)
 977           Node->dump(CurDAG);
 978         else
 979           ResNode->dump(CurDAG);
 980         dbgs() << "\n");
 981
 982   return ResNode;
 983 }
 984
 985 /// This pass converts a legalized DAG into a AArch64-specific DAG, ready for
 986 /// instruction scheduling.
 987 FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM,
 988                                          CodeGenOpt::Level OptLevel) {
 989   return new AArch64DAGToDAGISel(TM, OptLevel);
 990 }