lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the ARM target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "ARM.h"
  15 #include "ARMBaseInstrInfo.h"
  16 #include "ARMTargetMachine.h"
  17 #include "MCTargetDesc/ARMAddressingModes.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineInstrBuilder.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/IR/CallingConv.h"
  26 #include "llvm/IR/Constants.h"
  27 #include "llvm/IR/DerivedTypes.h"
  28 #include "llvm/IR/Function.h"
  29 #include "llvm/IR/Intrinsics.h"
  30 #include "llvm/IR/LLVMContext.h"
  31 #include "llvm/Support/CommandLine.h"
  32 #include "llvm/Support/Compiler.h"
  33 #include "llvm/Support/Debug.h"
  34 #include "llvm/Support/ErrorHandling.h"
  35 #include "llvm/Target/TargetLowering.h"
  36 #include "llvm/Target/TargetOptions.h"
  37
  38 using namespace llvm;
  39
  40 #define DEBUG_TYPE "arm-isel"
  41
  42 static cl::opt<bool>
  43 DisableShifterOp("disable-shifter-op", cl::Hidden,
  44   cl::desc("Disable isel of shifter-op"),
  45   cl::init(false));
  46
  47 static cl::opt<bool>
  48 CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
  49   cl::desc("Check fp vmla / vmls hazard at isel time"),
  50   cl::init(true));
  51
  52 //===--------------------------------------------------------------------===//
  53 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  54 /// instructions for SelectionDAG operations.
  55 ///
  56 namespace {
  57
  58 enum AddrMode2Type {
  59   AM2_BASE, // Simple AM2 (+-imm12)
  60   AM2_SHOP  // Shifter-op AM2
  61 };
  62
  63 class ARMDAGToDAGISel : public SelectionDAGISel {
  64   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  65   /// make the right decision when generating code for different targets.
  66   const ARMSubtarget *Subtarget;
  67
  68 public:
  69   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  70       : SelectionDAGISel(tm, OptLevel) {}
  71
  72   bool runOnMachineFunction(MachineFunction &MF) override {
  73     // Reset the subtarget each time through.
  74     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  75     SelectionDAGISel::runOnMachineFunction(MF);
  76     return true;
  77   }
  78
  79   const char *getPassName() const override {
  80     return "ARM Instruction Selection";
  81   }
  82
  83   void PreprocessISelDAG() override;
  84
  85   /// getI32Imm - Return a target constant of type i32 with the specified
  86   /// value.
  87   inline SDValue getI32Imm(unsigned Imm, SDLoc dl) {
  88     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  89   }
  90
  91   SDNode *Select(SDNode *N) override;
  92
  93
  94   bool hasNoVMLxHazardUse(SDNode *N) const;
  95   bool isShifterOpProfitable(const SDValue &Shift,
  96                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  97   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  98                                SDValue &B, SDValue &C,
  99                                bool CheckProfitability = true);
 100   bool SelectImmShifterOperand(SDValue N, SDValue &A,
 101                                SDValue &B, bool CheckProfitability = true);
 102   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
 103                                     SDValue &B, SDValue &C) {
 104     // Don't apply the profitability check
 105     return SelectRegShifterOperand(N, A, B, C, false);
 106   }
 107   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
 108                                     SDValue &B) {
 109     // Don't apply the profitability check
 110     return SelectImmShifterOperand(N, A, B, false);
 111   }
 112
 113   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 114   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 115
 116   AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
 117                                       SDValue &Offset, SDValue &Opc);
 118   bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
 119                            SDValue &Opc) {
 120     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
 121   }
 122
 123   bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
 124                            SDValue &Opc) {
 125     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
 126   }
 127
 128   bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
 129                        SDValue &Opc) {
 130     SelectAddrMode2Worker(N, Base, Offset, Opc);
 131 //    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
 132     // This always matches one way or another.
 133     return true;
 134   }
 135
 136   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 137     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 138     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 139     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 140     return true;
 141   }
 142
 143   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 144                              SDValue &Offset, SDValue &Opc);
 145   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 146                              SDValue &Offset, SDValue &Opc);
 147   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 148                              SDValue &Offset, SDValue &Opc);
 149   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 150   bool SelectAddrMode3(SDValue N, SDValue &Base,
 151                        SDValue &Offset, SDValue &Opc);
 152   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 153                              SDValue &Offset, SDValue &Opc);
 154   bool SelectAddrMode5(SDValue N, SDValue &Base,
 155                        SDValue &Offset);
 156   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 157   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 158
 159   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 160
 161   // Thumb Addressing Modes:
 162   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 163   bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
 164                              unsigned Scale);
 165   bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
 166   bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
 167   bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
 168   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 169                                 SDValue &OffImm);
 170   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 171                                  SDValue &OffImm);
 172   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 173                                  SDValue &OffImm);
 174   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 175                                  SDValue &OffImm);
 176   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 177
 178   // Thumb 2 Addressing Modes:
 179   bool SelectT2ShifterOperandReg(SDValue N,
 180                                  SDValue &BaseReg, SDValue &Opc);
 181   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 182   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 183                             SDValue &OffImm);
 184   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 185                                  SDValue &OffImm);
 186   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 187                              SDValue &OffReg, SDValue &ShImm);
 188   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 189
 190   inline bool is_so_imm(unsigned Imm) const {
 191     return ARM_AM::getSOImmVal(Imm) != -1;
 192   }
 193
 194   inline bool is_so_imm_not(unsigned Imm) const {
 195     return ARM_AM::getSOImmVal(~Imm) != -1;
 196   }
 197
 198   inline bool is_t2_so_imm(unsigned Imm) const {
 199     return ARM_AM::getT2SOImmVal(Imm) != -1;
 200   }
 201
 202   inline bool is_t2_so_imm_not(unsigned Imm) const {
 203     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 204   }
 205
 206   // Include the pieces autogenerated from the target description.
 207 #include "ARMGenDAGISel.inc"
 208
 209 private:
 210   /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
 211   /// ARM.
 212   SDNode *SelectARMIndexedLoad(SDNode *N);
 213   SDNode *SelectT2IndexedLoad(SDNode *N);
 214
 215   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 216   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 217   /// loads of D registers and even subregs and odd subregs of Q registers.
 218   /// For NumVecs <= 2, QOpcodes1 is not used.
 219   SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 220                     const uint16_t *DOpcodes,
 221                     const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
 222
 223   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 224   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 225   /// stores of D registers and even subregs and odd subregs of Q registers.
 226   /// For NumVecs <= 2, QOpcodes1 is not used.
 227   SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 228                     const uint16_t *DOpcodes,
 229                     const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
 230
 231   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 232   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 233   /// load/store of D registers and Q registers.
 234   SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
 235                           bool isUpdating, unsigned NumVecs,
 236                           const uint16_t *DOpcodes, const uint16_t *QOpcodes);
 237
 238   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 239   /// should be 2, 3 or 4.  The opcode array specifies the instructions used
 240   /// for loading D registers.  (Q registers are not supported.)
 241   SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
 242                        const uint16_t *Opcodes);
 243
 244   /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
 245   /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
 246   /// generated to force the table registers to be consecutive.
 247   SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
 248
 249   /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
 250   SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 251
 252   // Select special operations if node forms integer ABS pattern
 253   SDNode *SelectABSOp(SDNode *N);
 254
 255   SDNode *SelectReadRegister(SDNode *N);
 256   SDNode *SelectWriteRegister(SDNode *N);
 257
 258   SDNode *SelectInlineAsm(SDNode *N);
 259
 260   SDNode *SelectConcatVector(SDNode *N);
 261
 262   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 263   /// inline asm expressions.
 264   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 265                                     std::vector<SDValue> &OutOps) override;
 266
 267   // Form pairs of consecutive R, S, D, or Q registers.
 268   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 269   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 270   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 271   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 272
 273   // Form sequences of 4 consecutive S, D, or Q registers.
 274   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 275   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 276   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 277
 278   // Get the alignment operand for a NEON VLD or VST instruction.
 279   SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
 280                         bool is64BitVector);
 281 };
 282 }
 283
 284 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 285 /// operand. If so Imm will receive the 32-bit value.
 286 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 287   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 288     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 289     return true;
 290   }
 291   return false;
 292 }
 293
 294 // isInt32Immediate - This method tests to see if a constant operand.
 295 // If so Imm will receive the 32 bit value.
 296 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 297   return isInt32Immediate(N.getNode(), Imm);
 298 }
 299
 300 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 301 // opcode and that it has a immediate integer right operand.
 302 // If so Imm will receive the 32 bit value.
 303 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 304   return N->getOpcode() == Opc &&
 305          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 306 }
 307
 308 /// \brief Check whether a particular node is a constant value representable as
 309 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 310 ///
 311 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 312 static bool isScaledConstantInRange(SDValue Node, int Scale,
 313                                     int RangeMin, int RangeMax,
 314                                     int &ScaledConstant) {
 315   assert(Scale > 0 && "Invalid scale!");
 316
 317   // Check that this is a constant.
 318   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 319   if (!C)
 320     return false;
 321
 322   ScaledConstant = (int) C->getZExtValue();
 323   if ((ScaledConstant % Scale) != 0)
 324     return false;
 325
 326   ScaledConstant /= Scale;
 327   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 328 }
 329
 330 void ARMDAGToDAGISel::PreprocessISelDAG() {
 331   if (!Subtarget->hasV6T2Ops())
 332     return;
 333
 334   bool isThumb2 = Subtarget->isThumb();
 335   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 336        E = CurDAG->allnodes_end(); I != E; ) {
 337     SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
 338
 339     if (N->getOpcode() != ISD::ADD)
 340       continue;
 341
 342     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 343     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 344     // trailing zeros, e.g. 1020.
 345     // Transform the expression to
 346     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 347     // of trailing zeros of c2. The left shift would be folded as an shifter
 348     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 349     // node (UBFX).
 350
 351     SDValue N0 = N->getOperand(0);
 352     SDValue N1 = N->getOperand(1);
 353     unsigned And_imm = 0;
 354     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 355       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 356         std::swap(N0, N1);
 357     }
 358     if (!And_imm)
 359       continue;
 360
 361     // Check if the AND mask is an immediate of the form: 000.....1111111100
 362     unsigned TZ = countTrailingZeros(And_imm);
 363     if (TZ != 1 && TZ != 2)
 364       // Be conservative here. Shifter operands aren't always free. e.g. On
 365       // Swift, left shifter operand of 1 / 2 for free but others are not.
 366       // e.g.
 367       //  ubfx   r3, r1, #16, #8
 368       //  ldr.w  r3, [r0, r3, lsl #2]
 369       // vs.
 370       //  mov.w  r9, #1020
 371       //  and.w  r2, r9, r1, lsr #14
 372       //  ldr    r2, [r0, r2]
 373       continue;
 374     And_imm >>= TZ;
 375     if (And_imm & (And_imm + 1))
 376       continue;
 377
 378     // Look for (and (srl X, c1), c2).
 379     SDValue Srl = N1.getOperand(0);
 380     unsigned Srl_imm = 0;
 381     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 382         (Srl_imm <= 2))
 383       continue;
 384
 385     // Make sure first operand is not a shifter operand which would prevent
 386     // folding of the left shift.
 387     SDValue CPTmp0;
 388     SDValue CPTmp1;
 389     SDValue CPTmp2;
 390     if (isThumb2) {
 391       if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1))
 392         continue;
 393     } else {
 394       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 395           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 396         continue;
 397     }
 398
 399     // Now make the transformation.
 400     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 401                           Srl.getOperand(0),
 402                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 403                                               MVT::i32));
 404     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 405                          Srl,
 406                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 407     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 408                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 409     CurDAG->UpdateNodeOperands(N, N0, N1);
 410   }
 411 }
 412
 413 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 414 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 415 /// least on current ARM implementations) which should be avoidded.
 416 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 417   if (OptLevel == CodeGenOpt::None)
 418     return true;
 419
 420   if (!CheckVMLxHazard)
 421     return true;
 422
 423   if (!Subtarget->isCortexA7() && !Subtarget->isCortexA8() &&
 424       !Subtarget->isCortexA9() && !Subtarget->isSwift())
 425     return true;
 426
 427   if (!N->hasOneUse())
 428     return false;
 429
 430   SDNode *Use = *N->use_begin();
 431   if (Use->getOpcode() == ISD::CopyToReg)
 432     return true;
 433   if (Use->isMachineOpcode()) {
 434     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 435         CurDAG->getSubtarget().getInstrInfo());
 436
 437     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 438     if (MCID.mayStore())
 439       return true;
 440     unsigned Opcode = MCID.getOpcode();
 441     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 442       return true;
 443     // vmlx feeding into another vmlx. We actually want to unfold
 444     // the use later in the MLxExpansion pass. e.g.
 445     // vmla
 446     // vmla (stall 8 cycles)
 447     //
 448     // vmul (5 cycles)
 449     // vadd (5 cycles)
 450     // vmla
 451     // This adds up to about 18 - 19 cycles.
 452     //
 453     // vmla
 454     // vmul (stall 4 cycles)
 455     // vadd adds up to about 14 cycles.
 456     return TII->isFpMLxInstruction(Opcode);
 457   }
 458
 459   return false;
 460 }
 461
 462 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 463                                             ARM_AM::ShiftOpc ShOpcVal,
 464                                             unsigned ShAmt) {
 465   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 466     return true;
 467   if (Shift.hasOneUse())
 468     return true;
 469   // R << 2 is free.
 470   return ShOpcVal == ARM_AM::lsl &&
 471          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 472 }
 473
 474 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 475                                               SDValue &BaseReg,
 476                                               SDValue &Opc,
 477                                               bool CheckProfitability) {
 478   if (DisableShifterOp)
 479     return false;
 480
 481   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 482
 483   // Don't match base register only case. That is matched to a separate
 484   // lower complexity pattern with explicit register operand.
 485   if (ShOpcVal == ARM_AM::no_shift) return false;
 486
 487   BaseReg = N.getOperand(0);
 488   unsigned ShImmVal = 0;
 489   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 490   if (!RHS) return false;
 491   ShImmVal = RHS->getZExtValue() & 31;
 492   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 493                                   SDLoc(N), MVT::i32);
 494   return true;
 495 }
 496
 497 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 498                                               SDValue &BaseReg,
 499                                               SDValue &ShReg,
 500                                               SDValue &Opc,
 501                                               bool CheckProfitability) {
 502   if (DisableShifterOp)
 503     return false;
 504
 505   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 506
 507   // Don't match base register only case. That is matched to a separate
 508   // lower complexity pattern with explicit register operand.
 509   if (ShOpcVal == ARM_AM::no_shift) return false;
 510
 511   BaseReg = N.getOperand(0);
 512   unsigned ShImmVal = 0;
 513   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 514   if (RHS) return false;
 515
 516   ShReg = N.getOperand(1);
 517   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 518     return false;
 519   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 520                                   SDLoc(N), MVT::i32);
 521   return true;
 522 }
 523
 524
 525 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 526                                           SDValue &Base,
 527                                           SDValue &OffImm) {
 528   // Match simple R + imm12 operands.
 529
 530   // Base only.
 531   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 532       !CurDAG->isBaseWithConstantOffset(N)) {
 533     if (N.getOpcode() == ISD::FrameIndex) {
 534       // Match frame index.
 535       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 536       Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
 537       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 538       return true;
 539     }
 540
 541     if (N.getOpcode() == ARMISD::Wrapper &&
 542         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
 543       Base = N.getOperand(0);
 544     } else
 545       Base = N;
 546     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 547     return true;
 548   }
 549
 550   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 551     int RHSC = (int)RHS->getSExtValue();
 552     if (N.getOpcode() == ISD::SUB)
 553       RHSC = -RHSC;
 554
 555     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 556       Base   = N.getOperand(0);
 557       if (Base.getOpcode() == ISD::FrameIndex) {
 558         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 559         Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
 560       }
 561       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 562       return true;
 563     }
 564   }
 565
 566   // Base only.
 567   Base = N;
 568   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 569   return true;
 570 }
 571
 572
 573
 574 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 575                                       SDValue &Opc) {
 576   if (N.getOpcode() == ISD::MUL &&
 577       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 578     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 579       // X * [3,5,9] -> X + X * [2,4,8] etc.
 580       int RHSC = (int)RHS->getZExtValue();
 581       if (RHSC & 1) {
 582         RHSC = RHSC & ~1;
 583         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 584         if (RHSC < 0) {
 585           AddSub = ARM_AM::sub;
 586           RHSC = - RHSC;
 587         }
 588         if (isPowerOf2_32(RHSC)) {
 589           unsigned ShAmt = Log2_32(RHSC);
 590           Base = Offset = N.getOperand(0);
 591           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 592                                                             ARM_AM::lsl),
 593                                           SDLoc(N), MVT::i32);
 594           return true;
 595         }
 596       }
 597     }
 598   }
 599
 600   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 601       // ISD::OR that is equivalent to an ISD::ADD.
 602       !CurDAG->isBaseWithConstantOffset(N))
 603     return false;
 604
 605   // Leave simple R +/- imm12 operands for LDRi12
 606   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 607     int RHSC;
 608     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 609                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 610       return false;
 611   }
 612
 613   // Otherwise this is R +/- [possibly shifted] R.
 614   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 615   ARM_AM::ShiftOpc ShOpcVal =
 616     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 617   unsigned ShAmt = 0;
 618
 619   Base   = N.getOperand(0);
 620   Offset = N.getOperand(1);
 621
 622   if (ShOpcVal != ARM_AM::no_shift) {
 623     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 624     // it.
 625     if (ConstantSDNode *Sh =
 626            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 627       ShAmt = Sh->getZExtValue();
 628       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 629         Offset = N.getOperand(1).getOperand(0);
 630       else {
 631         ShAmt = 0;
 632         ShOpcVal = ARM_AM::no_shift;
 633       }
 634     } else {
 635       ShOpcVal = ARM_AM::no_shift;
 636     }
 637   }
 638
 639   // Try matching (R shl C) + (R).
 640   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 641       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 642         N.getOperand(0).hasOneUse())) {
 643     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 644     if (ShOpcVal != ARM_AM::no_shift) {
 645       // Check to see if the RHS of the shift is a constant, if not, we can't
 646       // fold it.
 647       if (ConstantSDNode *Sh =
 648           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 649         ShAmt = Sh->getZExtValue();
 650         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 651           Offset = N.getOperand(0).getOperand(0);
 652           Base = N.getOperand(1);
 653         } else {
 654           ShAmt = 0;
 655           ShOpcVal = ARM_AM::no_shift;
 656         }
 657       } else {
 658         ShOpcVal = ARM_AM::no_shift;
 659       }
 660     }
 661   }
 662
 663   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 664                                   SDLoc(N), MVT::i32);
 665   return true;
 666 }
 667
 668
 669 //-----
 670
 671 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
 672                                                      SDValue &Base,
 673                                                      SDValue &Offset,
 674                                                      SDValue &Opc) {
 675   if (N.getOpcode() == ISD::MUL &&
 676       (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
 677     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 678       // X * [3,5,9] -> X + X * [2,4,8] etc.
 679       int RHSC = (int)RHS->getZExtValue();
 680       if (RHSC & 1) {
 681         RHSC = RHSC & ~1;
 682         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 683         if (RHSC < 0) {
 684           AddSub = ARM_AM::sub;
 685           RHSC = - RHSC;
 686         }
 687         if (isPowerOf2_32(RHSC)) {
 688           unsigned ShAmt = Log2_32(RHSC);
 689           Base = Offset = N.getOperand(0);
 690           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 691                                                             ARM_AM::lsl),
 692                                           SDLoc(N), MVT::i32);
 693           return AM2_SHOP;
 694         }
 695       }
 696     }
 697   }
 698
 699   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 700       // ISD::OR that is equivalent to an ADD.
 701       !CurDAG->isBaseWithConstantOffset(N)) {
 702     Base = N;
 703     if (N.getOpcode() == ISD::FrameIndex) {
 704       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 705       Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
 706     } else if (N.getOpcode() == ARMISD::Wrapper &&
 707                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
 708       Base = N.getOperand(0);
 709     }
 710     Offset = CurDAG->getRegister(0, MVT::i32);
 711     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 712                                                       ARM_AM::no_shift),
 713                                     SDLoc(N), MVT::i32);
 714     return AM2_BASE;
 715   }
 716
 717   // Match simple R +/- imm12 operands.
 718   if (N.getOpcode() != ISD::SUB) {
 719     int RHSC;
 720     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 721                                 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
 722       Base = N.getOperand(0);
 723       if (Base.getOpcode() == ISD::FrameIndex) {
 724         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 725         Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
 726       }
 727       Offset = CurDAG->getRegister(0, MVT::i32);
 728
 729       ARM_AM::AddrOpc AddSub = ARM_AM::add;
 730       if (RHSC < 0) {
 731         AddSub = ARM_AM::sub;
 732         RHSC = - RHSC;
 733       }
 734       Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
 735                                                         ARM_AM::no_shift),
 736                                       SDLoc(N), MVT::i32);
 737       return AM2_BASE;
 738     }
 739   }
 740
 741   if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
 742     // Compute R +/- (R << N) and reuse it.
 743     Base = N;
 744     Offset = CurDAG->getRegister(0, MVT::i32);
 745     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 746                                                       ARM_AM::no_shift),
 747                                     SDLoc(N), MVT::i32);
 748     return AM2_BASE;
 749   }
 750
 751   // Otherwise this is R +/- [possibly shifted] R.
 752   ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
 753   ARM_AM::ShiftOpc ShOpcVal =
 754     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 755   unsigned ShAmt = 0;
 756
 757   Base   = N.getOperand(0);
 758   Offset = N.getOperand(1);
 759
 760   if (ShOpcVal != ARM_AM::no_shift) {
 761     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 762     // it.
 763     if (ConstantSDNode *Sh =
 764            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 765       ShAmt = Sh->getZExtValue();
 766       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 767         Offset = N.getOperand(1).getOperand(0);
 768       else {
 769         ShAmt = 0;
 770         ShOpcVal = ARM_AM::no_shift;
 771       }
 772     } else {
 773       ShOpcVal = ARM_AM::no_shift;
 774     }
 775   }
 776
 777   // Try matching (R shl C) + (R).
 778   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 779       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 780         N.getOperand(0).hasOneUse())) {
 781     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 782     if (ShOpcVal != ARM_AM::no_shift) {
 783       // Check to see if the RHS of the shift is a constant, if not, we can't
 784       // fold it.
 785       if (ConstantSDNode *Sh =
 786           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 787         ShAmt = Sh->getZExtValue();
 788         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 789           Offset = N.getOperand(0).getOperand(0);
 790           Base = N.getOperand(1);
 791         } else {
 792           ShAmt = 0;
 793           ShOpcVal = ARM_AM::no_shift;
 794         }
 795       } else {
 796         ShOpcVal = ARM_AM::no_shift;
 797       }
 798     }
 799   }
 800
 801   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 802                                   SDLoc(N), MVT::i32);
 803   return AM2_SHOP;
 804 }
 805
 806 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 807                                             SDValue &Offset, SDValue &Opc) {
 808   unsigned Opcode = Op->getOpcode();
 809   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 810     ? cast<LoadSDNode>(Op)->getAddressingMode()
 811     : cast<StoreSDNode>(Op)->getAddressingMode();
 812   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 813     ? ARM_AM::add : ARM_AM::sub;
 814   int Val;
 815   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 816     return false;
 817
 818   Offset = N;
 819   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 820   unsigned ShAmt = 0;
 821   if (ShOpcVal != ARM_AM::no_shift) {
 822     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 823     // it.
 824     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 825       ShAmt = Sh->getZExtValue();
 826       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 827         Offset = N.getOperand(0);
 828       else {
 829         ShAmt = 0;
 830         ShOpcVal = ARM_AM::no_shift;
 831       }
 832     } else {
 833       ShOpcVal = ARM_AM::no_shift;
 834     }
 835   }
 836
 837   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 838                                   SDLoc(N), MVT::i32);
 839   return true;
 840 }
 841
 842 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 843                                             SDValue &Offset, SDValue &Opc) {
 844   unsigned Opcode = Op->getOpcode();
 845   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 846     ? cast<LoadSDNode>(Op)->getAddressingMode()
 847     : cast<StoreSDNode>(Op)->getAddressingMode();
 848   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 849     ? ARM_AM::add : ARM_AM::sub;
 850   int Val;
 851   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 852     if (AddSub == ARM_AM::sub) Val *= -1;
 853     Offset = CurDAG->getRegister(0, MVT::i32);
 854     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 855     return true;
 856   }
 857
 858   return false;
 859 }
 860
 861
 862 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 863                                             SDValue &Offset, SDValue &Opc) {
 864   unsigned Opcode = Op->getOpcode();
 865   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 866     ? cast<LoadSDNode>(Op)->getAddressingMode()
 867     : cast<StoreSDNode>(Op)->getAddressingMode();
 868   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 869     ? ARM_AM::add : ARM_AM::sub;
 870   int Val;
 871   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 872     Offset = CurDAG->getRegister(0, MVT::i32);
 873     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 874                                                       ARM_AM::no_shift),
 875                                     SDLoc(Op), MVT::i32);
 876     return true;
 877   }
 878
 879   return false;
 880 }
 881
 882 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 883   Base = N;
 884   return true;
 885 }
 886
 887 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 888                                       SDValue &Base, SDValue &Offset,
 889                                       SDValue &Opc) {
 890   if (N.getOpcode() == ISD::SUB) {
 891     // X - C  is canonicalize to X + -C, no need to handle it here.
 892     Base = N.getOperand(0);
 893     Offset = N.getOperand(1);
 894     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 895                                     MVT::i32);
 896     return true;
 897   }
 898
 899   if (!CurDAG->isBaseWithConstantOffset(N)) {
 900     Base = N;
 901     if (N.getOpcode() == ISD::FrameIndex) {
 902       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 903       Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
 904     }
 905     Offset = CurDAG->getRegister(0, MVT::i32);
 906     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 907                                     MVT::i32);
 908     return true;
 909   }
 910
 911   // If the RHS is +/- imm8, fold into addr mode.
 912   int RHSC;
 913   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 914                               -256 + 1, 256, RHSC)) { // 8 bits.
 915     Base = N.getOperand(0);
 916     if (Base.getOpcode() == ISD::FrameIndex) {
 917       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 918       Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
 919     }
 920     Offset = CurDAG->getRegister(0, MVT::i32);
 921
 922     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 923     if (RHSC < 0) {
 924       AddSub = ARM_AM::sub;
 925       RHSC = -RHSC;
 926     }
 927     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
 928                                     MVT::i32);
 929     return true;
 930   }
 931
 932   Base = N.getOperand(0);
 933   Offset = N.getOperand(1);
 934   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 935                                   MVT::i32);
 936   return true;
 937 }
 938
 939 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
 940                                             SDValue &Offset, SDValue &Opc) {
 941   unsigned Opcode = Op->getOpcode();
 942   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 943     ? cast<LoadSDNode>(Op)->getAddressingMode()
 944     : cast<StoreSDNode>(Op)->getAddressingMode();
 945   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 946     ? ARM_AM::add : ARM_AM::sub;
 947   int Val;
 948   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
 949     Offset = CurDAG->getRegister(0, MVT::i32);
 950     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
 951                                     MVT::i32);
 952     return true;
 953   }
 954
 955   Offset = N;
 956   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
 957                                   MVT::i32);
 958   return true;
 959 }
 960
 961 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
 962                                       SDValue &Base, SDValue &Offset) {
 963   if (!CurDAG->isBaseWithConstantOffset(N)) {
 964     Base = N;
 965     if (N.getOpcode() == ISD::FrameIndex) {
 966       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 967       Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
 968     } else if (N.getOpcode() == ARMISD::Wrapper &&
 969                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
 970       Base = N.getOperand(0);
 971     }
 972     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 973                                        SDLoc(N), MVT::i32);
 974     return true;
 975   }
 976
 977   // If the RHS is +/- imm8, fold into addr mode.
 978   int RHSC;
 979   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
 980                               -256 + 1, 256, RHSC)) {
 981     Base = N.getOperand(0);
 982     if (Base.getOpcode() == ISD::FrameIndex) {
 983       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 984       Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
 985     }
 986
 987     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 988     if (RHSC < 0) {
 989       AddSub = ARM_AM::sub;
 990       RHSC = -RHSC;
 991     }
 992     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
 993                                        SDLoc(N), MVT::i32);
 994     return true;
 995   }
 996
 997   Base = N;
 998   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 999                                      SDLoc(N), MVT::i32);
1000   return true;
1001 }
1002
1003 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1004                                       SDValue &Align) {
1005   Addr = N;
1006
1007   unsigned Alignment = 0;
1008
1009   MemSDNode *MemN = cast<MemSDNode>(Parent);
1010
1011   if (isa<LSBaseSDNode>(MemN) ||
1012       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1013         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1014        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1015     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1016     // The maximum alignment is equal to the memory size being referenced.
1017     unsigned MMOAlign = MemN->getAlignment();
1018     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1019     if (MMOAlign >= MemSize && MemSize > 1)
1020       Alignment = MemSize;
1021   } else {
1022     // All other uses of addrmode6 are for intrinsics.  For now just record
1023     // the raw alignment value; it will be refined later based on the legal
1024     // alignment operands for the intrinsic.
1025     Alignment = MemN->getAlignment();
1026   }
1027
1028   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1029   return true;
1030 }
1031
1032 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1033                                             SDValue &Offset) {
1034   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1035   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1036   if (AM != ISD::POST_INC)
1037     return false;
1038   Offset = N;
1039   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1040     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1041       Offset = CurDAG->getRegister(0, MVT::i32);
1042   }
1043   return true;
1044 }
1045
1046 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1047                                        SDValue &Offset, SDValue &Label) {
1048   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1049     Offset = N.getOperand(0);
1050     SDValue N1 = N.getOperand(1);
1051     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1052                                       SDLoc(N), MVT::i32);
1053     return true;
1054   }
1055
1056   return false;
1057 }
1058
1059
1060 //===----------------------------------------------------------------------===//
1061 //                         Thumb Addressing Modes
1062 //===----------------------------------------------------------------------===//
1063
1064 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1065                                             SDValue &Base, SDValue &Offset){
1066   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1067     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1068     if (!NC || !NC->isNullValue())
1069       return false;
1070
1071     Base = Offset = N;
1072     return true;
1073   }
1074
1075   Base = N.getOperand(0);
1076   Offset = N.getOperand(1);
1077   return true;
1078 }
1079
1080 bool
1081 ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
1082                                        SDValue &Offset, unsigned Scale) {
1083   if (Scale == 4) {
1084     SDValue TmpBase, TmpOffImm;
1085     if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
1086       return false;  // We want to select tLDRspi / tSTRspi instead.
1087
1088     if (N.getOpcode() == ARMISD::Wrapper &&
1089         N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
1090       return false;  // We want to select tLDRpci instead.
1091   }
1092
1093   if (!CurDAG->isBaseWithConstantOffset(N))
1094     return false;
1095
1096   // Thumb does not have [sp, r] address mode.
1097   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1098   RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
1099   if ((LHSR && LHSR->getReg() == ARM::SP) ||
1100       (RHSR && RHSR->getReg() == ARM::SP))
1101     return false;
1102
1103   // FIXME: Why do we explicitly check for a match here and then return false?
1104   // Presumably to allow something else to match, but shouldn't this be
1105   // documented?
1106   int RHSC;
1107   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
1108     return false;
1109
1110   Base = N.getOperand(0);
1111   Offset = N.getOperand(1);
1112   return true;
1113 }
1114
1115 bool
1116 ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
1117                                           SDValue &Base,
1118                                           SDValue &Offset) {
1119   return SelectThumbAddrModeRI(N, Base, Offset, 1);
1120 }
1121
1122 bool
1123 ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
1124                                           SDValue &Base,
1125                                           SDValue &Offset) {
1126   return SelectThumbAddrModeRI(N, Base, Offset, 2);
1127 }
1128
1129 bool
1130 ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
1131                                           SDValue &Base,
1132                                           SDValue &Offset) {
1133   return SelectThumbAddrModeRI(N, Base, Offset, 4);
1134 }
1135
1136 bool
1137 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1138                                           SDValue &Base, SDValue &OffImm) {
1139   if (Scale == 4) {
1140     SDValue TmpBase, TmpOffImm;
1141     if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
1142       return false;  // We want to select tLDRspi / tSTRspi instead.
1143
1144     if (N.getOpcode() == ARMISD::Wrapper &&
1145         N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
1146       return false;  // We want to select tLDRpci instead.
1147   }
1148
1149   if (!CurDAG->isBaseWithConstantOffset(N)) {
1150     if (N.getOpcode() == ARMISD::Wrapper &&
1151         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
1152       Base = N.getOperand(0);
1153     } else {
1154       Base = N;
1155     }
1156
1157     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1158     return true;
1159   }
1160
1161   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1162   RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
1163   if ((LHSR && LHSR->getReg() == ARM::SP) ||
1164       (RHSR && RHSR->getReg() == ARM::SP)) {
1165     ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
1166     ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1167     unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
1168     unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
1169
1170     // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
1171     if (LHSC != 0 || RHSC != 0) return false;
1172
1173     Base = N;
1174     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1175     return true;
1176   }
1177
1178   // If the RHS is + imm5 * scale, fold into addr mode.
1179   int RHSC;
1180   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1181     Base = N.getOperand(0);
1182     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1183     return true;
1184   }
1185
1186   Base = N.getOperand(0);
1187   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1188   return true;
1189 }
1190
1191 bool
1192 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1193                                            SDValue &OffImm) {
1194   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1195 }
1196
1197 bool
1198 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1199                                            SDValue &OffImm) {
1200   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1201 }
1202
1203 bool
1204 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1205                                            SDValue &OffImm) {
1206   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1207 }
1208
1209 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1210                                             SDValue &Base, SDValue &OffImm) {
1211   if (N.getOpcode() == ISD::FrameIndex) {
1212     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1213     // Only multiples of 4 are allowed for the offset, so the frame object
1214     // alignment must be at least 4.
1215     MachineFrameInfo *MFI = MF->getFrameInfo();
1216     if (MFI->getObjectAlignment(FI) < 4)
1217       MFI->setObjectAlignment(FI, 4);
1218     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
1219     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1220     return true;
1221   }
1222
1223   if (!CurDAG->isBaseWithConstantOffset(N))
1224     return false;
1225
1226   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1227   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1228       (LHSR && LHSR->getReg() == ARM::SP)) {
1229     // If the RHS is + imm8 * scale, fold into addr mode.
1230     int RHSC;
1231     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1232       Base = N.getOperand(0);
1233       if (Base.getOpcode() == ISD::FrameIndex) {
1234         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1235         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1236         // indexed by the LHS must be 4-byte aligned.
1237         MachineFrameInfo *MFI = MF->getFrameInfo();
1238         if (MFI->getObjectAlignment(FI) < 4)
1239           MFI->setObjectAlignment(FI, 4);
1240         Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
1241       }
1242       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1243       return true;
1244     }
1245   }
1246
1247   return false;
1248 }
1249
1250
1251 //===----------------------------------------------------------------------===//
1252 //                        Thumb 2 Addressing Modes
1253 //===----------------------------------------------------------------------===//
1254
1255
1256 bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
1257                                                 SDValue &Opc) {
1258   if (DisableShifterOp)
1259     return false;
1260
1261   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
1262
1263   // Don't match base register only case. That is matched to a separate
1264   // lower complexity pattern with explicit register operand.
1265   if (ShOpcVal == ARM_AM::no_shift) return false;
1266
1267   BaseReg = N.getOperand(0);
1268   unsigned ShImmVal = 0;
1269   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1270     ShImmVal = RHS->getZExtValue() & 31;
1271     Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), SDLoc(N));
1272     return true;
1273   }
1274
1275   return false;
1276 }
1277
1278 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1279                                             SDValue &Base, SDValue &OffImm) {
1280   // Match simple R + imm12 operands.
1281
1282   // Base only.
1283   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1284       !CurDAG->isBaseWithConstantOffset(N)) {
1285     if (N.getOpcode() == ISD::FrameIndex) {
1286       // Match frame index.
1287       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1288       Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
1289       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1290       return true;
1291     }
1292
1293     if (N.getOpcode() == ARMISD::Wrapper &&
1294         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
1295       Base = N.getOperand(0);
1296       if (Base.getOpcode() == ISD::TargetConstantPool)
1297         return false;  // We want to select t2LDRpci instead.
1298     } else
1299       Base = N;
1300     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1301     return true;
1302   }
1303
1304   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1305     if (SelectT2AddrModeImm8(N, Base, OffImm))
1306       // Let t2LDRi8 handle (R - imm8).
1307       return false;
1308
1309     int RHSC = (int)RHS->getZExtValue();
1310     if (N.getOpcode() == ISD::SUB)
1311       RHSC = -RHSC;
1312
1313     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1314       Base   = N.getOperand(0);
1315       if (Base.getOpcode() == ISD::FrameIndex) {
1316         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1317         Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
1318       }
1319       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1320       return true;
1321     }
1322   }
1323
1324   // Base only.
1325   Base = N;
1326   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1327   return true;
1328 }
1329
1330 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1331                                            SDValue &Base, SDValue &OffImm) {
1332   // Match simple R - imm8 operands.
1333   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1334       !CurDAG->isBaseWithConstantOffset(N))
1335     return false;
1336
1337   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1338     int RHSC = (int)RHS->getSExtValue();
1339     if (N.getOpcode() == ISD::SUB)
1340       RHSC = -RHSC;
1341
1342     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1343       Base = N.getOperand(0);
1344       if (Base.getOpcode() == ISD::FrameIndex) {
1345         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1346         Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
1347       }
1348       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1349       return true;
1350     }
1351   }
1352
1353   return false;
1354 }
1355
1356 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1357                                                  SDValue &OffImm){
1358   unsigned Opcode = Op->getOpcode();
1359   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1360     ? cast<LoadSDNode>(Op)->getAddressingMode()
1361     : cast<StoreSDNode>(Op)->getAddressingMode();
1362   int RHSC;
1363   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1364     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1365       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1366       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1367     return true;
1368   }
1369
1370   return false;
1371 }
1372
1373 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1374                                             SDValue &Base,
1375                                             SDValue &OffReg, SDValue &ShImm) {
1376   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1377   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1378     return false;
1379
1380   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1381   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1382     int RHSC = (int)RHS->getZExtValue();
1383     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1384       return false;
1385     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1386       return false;
1387   }
1388
1389   // Look for (R + R) or (R + (R << [1,2,3])).
1390   unsigned ShAmt = 0;
1391   Base   = N.getOperand(0);
1392   OffReg = N.getOperand(1);
1393
1394   // Swap if it is ((R << c) + R).
1395   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1396   if (ShOpcVal != ARM_AM::lsl) {
1397     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1398     if (ShOpcVal == ARM_AM::lsl)
1399       std::swap(Base, OffReg);
1400   }
1401
1402   if (ShOpcVal == ARM_AM::lsl) {
1403     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1404     // it.
1405     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1406       ShAmt = Sh->getZExtValue();
1407       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1408         OffReg = OffReg.getOperand(0);
1409       else {
1410         ShAmt = 0;
1411       }
1412     }
1413   }
1414
1415   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1416
1417   return true;
1418 }
1419
1420 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1421                                                 SDValue &OffImm) {
1422   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1423   // instructions.
1424   Base = N;
1425   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1426
1427   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1428     return true;
1429
1430   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1431   if (!RHS)
1432     return true;
1433
1434   uint32_t RHSC = (int)RHS->getZExtValue();
1435   if (RHSC > 1020 || RHSC % 4 != 0)
1436     return true;
1437
1438   Base = N.getOperand(0);
1439   if (Base.getOpcode() == ISD::FrameIndex) {
1440     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1441     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
1442   }
1443
1444   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1445   return true;
1446 }
1447
1448 //===--------------------------------------------------------------------===//
1449
1450 /// getAL - Returns a ARMCC::AL immediate node.
1451 static inline SDValue getAL(SelectionDAG *CurDAG, SDLoc dl) {
1452   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1453 }
1454
1455 SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
1456   LoadSDNode *LD = cast<LoadSDNode>(N);
1457   ISD::MemIndexedMode AM = LD->getAddressingMode();
1458   if (AM == ISD::UNINDEXED)
1459     return nullptr;
1460
1461   EVT LoadedVT = LD->getMemoryVT();
1462   SDValue Offset, AMOpc;
1463   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1464   unsigned Opcode = 0;
1465   bool Match = false;
1466   if (LoadedVT == MVT::i32 && isPre &&
1467       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1468     Opcode = ARM::LDR_PRE_IMM;
1469     Match = true;
1470   } else if (LoadedVT == MVT::i32 && !isPre &&
1471       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1472     Opcode = ARM::LDR_POST_IMM;
1473     Match = true;
1474   } else if (LoadedVT == MVT::i32 &&
1475       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1476     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1477     Match = true;
1478
1479   } else if (LoadedVT == MVT::i16 &&
1480              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1481     Match = true;
1482     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1483       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1484       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1485   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1486     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1487       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1488         Match = true;
1489         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1490       }
1491     } else {
1492       if (isPre &&
1493           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1494         Match = true;
1495         Opcode = ARM::LDRB_PRE_IMM;
1496       } else if (!isPre &&
1497                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1498         Match = true;
1499         Opcode = ARM::LDRB_POST_IMM;
1500       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1501         Match = true;
1502         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1503       }
1504     }
1505   }
1506
1507   if (Match) {
1508     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1509       SDValue Chain = LD->getChain();
1510       SDValue Base = LD->getBasePtr();
1511       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1512                        CurDAG->getRegister(0, MVT::i32), Chain };
1513       return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1514                                     MVT::i32, MVT::Other, Ops);
1515     } else {
1516       SDValue Chain = LD->getChain();
1517       SDValue Base = LD->getBasePtr();
1518       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1519                        CurDAG->getRegister(0, MVT::i32), Chain };
1520       return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1521                                     MVT::i32, MVT::Other, Ops);
1522     }
1523   }
1524
1525   return nullptr;
1526 }
1527
1528 SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
1529   LoadSDNode *LD = cast<LoadSDNode>(N);
1530   ISD::MemIndexedMode AM = LD->getAddressingMode();
1531   if (AM == ISD::UNINDEXED)
1532     return nullptr;
1533
1534   EVT LoadedVT = LD->getMemoryVT();
1535   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1536   SDValue Offset;
1537   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1538   unsigned Opcode = 0;
1539   bool Match = false;
1540   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1541     switch (LoadedVT.getSimpleVT().SimpleTy) {
1542     case MVT::i32:
1543       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1544       break;
1545     case MVT::i16:
1546       if (isSExtLd)
1547         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1548       else
1549         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1550       break;
1551     case MVT::i8:
1552     case MVT::i1:
1553       if (isSExtLd)
1554         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1555       else
1556         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1557       break;
1558     default:
1559       return nullptr;
1560     }
1561     Match = true;
1562   }
1563
1564   if (Match) {
1565     SDValue Chain = LD->getChain();
1566     SDValue Base = LD->getBasePtr();
1567     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1568                      CurDAG->getRegister(0, MVT::i32), Chain };
1569     return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1570                                   MVT::Other, Ops);
1571   }
1572
1573   return nullptr;
1574 }
1575
1576 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1577 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1578   SDLoc dl(V0.getNode());
1579   SDValue RegClass =
1580     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1581   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1582   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1583   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1584   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1585 }
1586
1587 /// \brief Form a D register from a pair of S registers.
1588 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1589   SDLoc dl(V0.getNode());
1590   SDValue RegClass =
1591     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1592   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1593   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1594   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1595   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1596 }
1597
1598 /// \brief Form a quad register from a pair of D registers.
1599 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1600   SDLoc dl(V0.getNode());
1601   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1602                                                MVT::i32);
1603   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1604   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1605   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1606   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1607 }
1608
1609 /// \brief Form 4 consecutive D registers from a pair of Q registers.
1610 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1611   SDLoc dl(V0.getNode());
1612   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1613                                                MVT::i32);
1614   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1615   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1616   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1617   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1618 }
1619
1620 /// \brief Form 4 consecutive S registers.
1621 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1622                                    SDValue V2, SDValue V3) {
1623   SDLoc dl(V0.getNode());
1624   SDValue RegClass =
1625     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1626   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1627   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1628   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1629   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1630   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1631                                     V2, SubReg2, V3, SubReg3 };
1632   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1633 }
1634
1635 /// \brief Form 4 consecutive D registers.
1636 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1637                                    SDValue V2, SDValue V3) {
1638   SDLoc dl(V0.getNode());
1639   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1640                                                MVT::i32);
1641   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1642   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1643   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1644   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1645   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1646                                     V2, SubReg2, V3, SubReg3 };
1647   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1648 }
1649
1650 /// \brief Form 4 consecutive Q registers.
1651 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1652                                    SDValue V2, SDValue V3) {
1653   SDLoc dl(V0.getNode());
1654   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1655                                                MVT::i32);
1656   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1657   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1658   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1659   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1660   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1661                                     V2, SubReg2, V3, SubReg3 };
1662   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1663 }
1664
1665 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1666 /// of a NEON VLD or VST instruction.  The supported values depend on the
1667 /// number of registers being loaded.
1668 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, SDLoc dl,
1669                                        unsigned NumVecs, bool is64BitVector) {
1670   unsigned NumRegs = NumVecs;
1671   if (!is64BitVector && NumVecs < 3)
1672     NumRegs *= 2;
1673
1674   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1675   if (Alignment >= 32 && NumRegs == 4)
1676     Alignment = 32;
1677   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1678     Alignment = 16;
1679   else if (Alignment >= 8)
1680     Alignment = 8;
1681   else
1682     Alignment = 0;
1683
1684   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1685 }
1686
1687 static bool isVLDfixed(unsigned Opc)
1688 {
1689   switch (Opc) {
1690   default: return false;
1691   case ARM::VLD1d8wb_fixed : return true;
1692   case ARM::VLD1d16wb_fixed : return true;
1693   case ARM::VLD1d64Qwb_fixed : return true;
1694   case ARM::VLD1d32wb_fixed : return true;
1695   case ARM::VLD1d64wb_fixed : return true;
1696   case ARM::VLD1d64TPseudoWB_fixed : return true;
1697   case ARM::VLD1d64QPseudoWB_fixed : return true;
1698   case ARM::VLD1q8wb_fixed : return true;
1699   case ARM::VLD1q16wb_fixed : return true;
1700   case ARM::VLD1q32wb_fixed : return true;
1701   case ARM::VLD1q64wb_fixed : return true;
1702   case ARM::VLD2d8wb_fixed : return true;
1703   case ARM::VLD2d16wb_fixed : return true;
1704   case ARM::VLD2d32wb_fixed : return true;
1705   case ARM::VLD2q8PseudoWB_fixed : return true;
1706   case ARM::VLD2q16PseudoWB_fixed : return true;
1707   case ARM::VLD2q32PseudoWB_fixed : return true;
1708   case ARM::VLD2DUPd8wb_fixed : return true;
1709   case ARM::VLD2DUPd16wb_fixed : return true;
1710   case ARM::VLD2DUPd32wb_fixed : return true;
1711   }
1712 }
1713
1714 static bool isVSTfixed(unsigned Opc)
1715 {
1716   switch (Opc) {
1717   default: return false;
1718   case ARM::VST1d8wb_fixed : return true;
1719   case ARM::VST1d16wb_fixed : return true;
1720   case ARM::VST1d32wb_fixed : return true;
1721   case ARM::VST1d64wb_fixed : return true;
1722   case ARM::VST1q8wb_fixed : return true;
1723   case ARM::VST1q16wb_fixed : return true;
1724   case ARM::VST1q32wb_fixed : return true;
1725   case ARM::VST1q64wb_fixed : return true;
1726   case ARM::VST1d64TPseudoWB_fixed : return true;
1727   case ARM::VST1d64QPseudoWB_fixed : return true;
1728   case ARM::VST2d8wb_fixed : return true;
1729   case ARM::VST2d16wb_fixed : return true;
1730   case ARM::VST2d32wb_fixed : return true;
1731   case ARM::VST2q8PseudoWB_fixed : return true;
1732   case ARM::VST2q16PseudoWB_fixed : return true;
1733   case ARM::VST2q32PseudoWB_fixed : return true;
1734   }
1735 }
1736
1737 // Get the register stride update opcode of a VLD/VST instruction that
1738 // is otherwise equivalent to the given fixed stride updating instruction.
1739 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1740   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1741     && "Incorrect fixed stride updating instruction.");
1742   switch (Opc) {
1743   default: break;
1744   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1745   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1746   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1747   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1748   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1749   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1750   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1751   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1752   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1753   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1754   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1755   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1756
1757   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1758   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1759   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1760   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1761   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1762   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1763   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1764   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1765   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1766   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1767
1768   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1769   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1770   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1771   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1772   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1773   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1774
1775   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1776   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1777   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1778   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1779   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1780   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1781
1782   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1783   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1784   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1785   }
1786   return Opc; // If not one we handle, return it unchanged.
1787 }
1788
1789 SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1790                                    const uint16_t *DOpcodes,
1791                                    const uint16_t *QOpcodes0,
1792                                    const uint16_t *QOpcodes1) {
1793   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1794   SDLoc dl(N);
1795
1796   SDValue MemAddr, Align;
1797   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1798   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1799     return nullptr;
1800
1801   SDValue Chain = N->getOperand(0);
1802   EVT VT = N->getValueType(0);
1803   bool is64BitVector = VT.is64BitVector();
1804   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1805
1806   unsigned OpcodeIndex;
1807   switch (VT.getSimpleVT().SimpleTy) {
1808   default: llvm_unreachable("unhandled vld type");
1809     // Double-register operations:
1810   case MVT::v8i8:  OpcodeIndex = 0; break;
1811   case MVT::v4i16: OpcodeIndex = 1; break;
1812   case MVT::v2f32:
1813   case MVT::v2i32: OpcodeIndex = 2; break;
1814   case MVT::v1i64: OpcodeIndex = 3; break;
1815     // Quad-register operations:
1816   case MVT::v16i8: OpcodeIndex = 0; break;
1817   case MVT::v8i16: OpcodeIndex = 1; break;
1818   case MVT::v4f32:
1819   case MVT::v4i32: OpcodeIndex = 2; break;
1820   case MVT::v2f64:
1821   case MVT::v2i64: OpcodeIndex = 3;
1822     assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1823     break;
1824   }
1825
1826   EVT ResTy;
1827   if (NumVecs == 1)
1828     ResTy = VT;
1829   else {
1830     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1831     if (!is64BitVector)
1832       ResTyElts *= 2;
1833     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1834   }
1835   std::vector<EVT> ResTys;
1836   ResTys.push_back(ResTy);
1837   if (isUpdating)
1838     ResTys.push_back(MVT::i32);
1839   ResTys.push_back(MVT::Other);
1840
1841   SDValue Pred = getAL(CurDAG, dl);
1842   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1843   SDNode *VLd;
1844   SmallVector<SDValue, 7> Ops;
1845
1846   // Double registers and VLD1/VLD2 quad registers are directly supported.
1847   if (is64BitVector || NumVecs <= 2) {
1848     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1849                     QOpcodes0[OpcodeIndex]);
1850     Ops.push_back(MemAddr);
1851     Ops.push_back(Align);
1852     if (isUpdating) {
1853       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1854       // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1855       // case entirely when the rest are updated to that form, too.
1856       if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
1857         Opc = getVLDSTRegisterUpdateOpcode(Opc);
1858       // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1859       // check for that explicitly too. Horribly hacky, but temporary.
1860       if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
1861           !isa<ConstantSDNode>(Inc.getNode()))
1862         Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1863     }
1864     Ops.push_back(Pred);
1865     Ops.push_back(Reg0);
1866     Ops.push_back(Chain);
1867     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1868
1869   } else {
1870     // Otherwise, quad registers are loaded with two separate instructions,
1871     // where one loads the even registers and the other loads the odd registers.
1872     EVT AddrTy = MemAddr.getValueType();
1873
1874     // Load the even subregs.  This is always an updating load, so that it
1875     // provides the address to the second load for the odd subregs.
1876     SDValue ImplDef =
1877       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1878     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1879     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1880                                           ResTy, AddrTy, MVT::Other, OpsA);
1881     Chain = SDValue(VLdA, 2);
1882
1883     // Load the odd subregs.
1884     Ops.push_back(SDValue(VLdA, 1));
1885     Ops.push_back(Align);
1886     if (isUpdating) {
1887       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1888       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1889              "only constant post-increment update allowed for VLD3/4");
1890       (void)Inc;
1891       Ops.push_back(Reg0);
1892     }
1893     Ops.push_back(SDValue(VLdA, 0));
1894     Ops.push_back(Pred);
1895     Ops.push_back(Reg0);
1896     Ops.push_back(Chain);
1897     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1898   }
1899
1900   // Transfer memoperands.
1901   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1902   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1903   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1904
1905   if (NumVecs == 1)
1906     return VLd;
1907
1908   // Extract out the subregisters.
1909   SDValue SuperReg = SDValue(VLd, 0);
1910   assert(ARM::dsub_7 == ARM::dsub_0+7 &&
1911          ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
1912   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1913   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1914     ReplaceUses(SDValue(N, Vec),
1915                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1916   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1917   if (isUpdating)
1918     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1919   return nullptr;
1920 }
1921
1922 SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1923                                    const uint16_t *DOpcodes,
1924                                    const uint16_t *QOpcodes0,
1925                                    const uint16_t *QOpcodes1) {
1926   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1927   SDLoc dl(N);
1928
1929   SDValue MemAddr, Align;
1930   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1931   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1932   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1933     return nullptr;
1934
1935   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1936   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1937
1938   SDValue Chain = N->getOperand(0);
1939   EVT VT = N->getOperand(Vec0Idx).getValueType();
1940   bool is64BitVector = VT.is64BitVector();
1941   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1942
1943   unsigned OpcodeIndex;
1944   switch (VT.getSimpleVT().SimpleTy) {
1945   default: llvm_unreachable("unhandled vst type");
1946     // Double-register operations:
1947   case MVT::v8i8:  OpcodeIndex = 0; break;
1948   case MVT::v4i16: OpcodeIndex = 1; break;
1949   case MVT::v2f32:
1950   case MVT::v2i32: OpcodeIndex = 2; break;
1951   case MVT::v1i64: OpcodeIndex = 3; break;
1952     // Quad-register operations:
1953   case MVT::v16i8: OpcodeIndex = 0; break;
1954   case MVT::v8i16: OpcodeIndex = 1; break;
1955   case MVT::v4f32:
1956   case MVT::v4i32: OpcodeIndex = 2; break;
1957   case MVT::v2f64:
1958   case MVT::v2i64: OpcodeIndex = 3;
1959     assert(NumVecs == 1 && "v2i64 type only supported for VST1");
1960     break;
1961   }
1962
1963   std::vector<EVT> ResTys;
1964   if (isUpdating)
1965     ResTys.push_back(MVT::i32);
1966   ResTys.push_back(MVT::Other);
1967
1968   SDValue Pred = getAL(CurDAG, dl);
1969   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1970   SmallVector<SDValue, 7> Ops;
1971
1972   // Double registers and VST1/VST2 quad registers are directly supported.
1973   if (is64BitVector || NumVecs <= 2) {
1974     SDValue SrcReg;
1975     if (NumVecs == 1) {
1976       SrcReg = N->getOperand(Vec0Idx);
1977     } else if (is64BitVector) {
1978       // Form a REG_SEQUENCE to force register allocation.
1979       SDValue V0 = N->getOperand(Vec0Idx + 0);
1980       SDValue V1 = N->getOperand(Vec0Idx + 1);
1981       if (NumVecs == 2)
1982         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
1983       else {
1984         SDValue V2 = N->getOperand(Vec0Idx + 2);
1985         // If it's a vst3, form a quad D-register and leave the last part as
1986         // an undef.
1987         SDValue V3 = (NumVecs == 3)
1988           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
1989           : N->getOperand(Vec0Idx + 3);
1990         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
1991       }
1992     } else {
1993       // Form a QQ register.
1994       SDValue Q0 = N->getOperand(Vec0Idx);
1995       SDValue Q1 = N->getOperand(Vec0Idx + 1);
1996       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
1997     }
1998
1999     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2000                     QOpcodes0[OpcodeIndex]);
2001     Ops.push_back(MemAddr);
2002     Ops.push_back(Align);
2003     if (isUpdating) {
2004       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2005       // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2006       // case entirely when the rest are updated to that form, too.
2007       if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2008         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2009       // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2010       // check for that explicitly too. Horribly hacky, but temporary.
2011       if  (!isa<ConstantSDNode>(Inc.getNode()))
2012         Ops.push_back(Inc);
2013       else if (NumVecs > 2 && !isVSTfixed(Opc))
2014         Ops.push_back(Reg0);
2015     }
2016     Ops.push_back(SrcReg);
2017     Ops.push_back(Pred);
2018     Ops.push_back(Reg0);
2019     Ops.push_back(Chain);
2020     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2021
2022     // Transfer memoperands.
2023     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2024
2025     return VSt;
2026   }
2027
2028   // Otherwise, quad registers are stored with two separate instructions,
2029   // where one stores the even registers and the other stores the odd registers.
2030
2031   // Form the QQQQ REG_SEQUENCE.
2032   SDValue V0 = N->getOperand(Vec0Idx + 0);
2033   SDValue V1 = N->getOperand(Vec0Idx + 1);
2034   SDValue V2 = N->getOperand(Vec0Idx + 2);
2035   SDValue V3 = (NumVecs == 3)
2036     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2037     : N->getOperand(Vec0Idx + 3);
2038   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2039
2040   // Store the even D registers.  This is always an updating store, so that it
2041   // provides the address to the second store for the odd subregs.
2042   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2043   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2044                                         MemAddr.getValueType(),
2045                                         MVT::Other, OpsA);
2046   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2047   Chain = SDValue(VStA, 1);
2048
2049   // Store the odd D registers.
2050   Ops.push_back(SDValue(VStA, 0));
2051   Ops.push_back(Align);
2052   if (isUpdating) {
2053     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2054     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2055            "only constant post-increment update allowed for VST3/4");
2056     (void)Inc;
2057     Ops.push_back(Reg0);
2058   }
2059   Ops.push_back(RegSeq);
2060   Ops.push_back(Pred);
2061   Ops.push_back(Reg0);
2062   Ops.push_back(Chain);
2063   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2064                                         Ops);
2065   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2066   return VStB;
2067 }
2068
2069 SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
2070                                          bool isUpdating, unsigned NumVecs,
2071                                          const uint16_t *DOpcodes,
2072                                          const uint16_t *QOpcodes) {
2073   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2074   SDLoc dl(N);
2075
2076   SDValue MemAddr, Align;
2077   unsigned AddrOpIdx = isUpdating ? 1 : 2;
2078   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2079   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2080     return nullptr;
2081
2082   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2083   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2084
2085   SDValue Chain = N->getOperand(0);
2086   unsigned Lane =
2087     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2088   EVT VT = N->getOperand(Vec0Idx).getValueType();
2089   bool is64BitVector = VT.is64BitVector();
2090
2091   unsigned Alignment = 0;
2092   if (NumVecs != 3) {
2093     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2094     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2095     if (Alignment > NumBytes)
2096       Alignment = NumBytes;
2097     if (Alignment < 8 && Alignment < NumBytes)
2098       Alignment = 0;
2099     // Alignment must be a power of two; make sure of that.
2100     Alignment = (Alignment & -Alignment);
2101     if (Alignment == 1)
2102       Alignment = 0;
2103   }
2104   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2105
2106   unsigned OpcodeIndex;
2107   switch (VT.getSimpleVT().SimpleTy) {
2108   default: llvm_unreachable("unhandled vld/vst lane type");
2109     // Double-register operations:
2110   case MVT::v8i8:  OpcodeIndex = 0; break;
2111   case MVT::v4i16: OpcodeIndex = 1; break;
2112   case MVT::v2f32:
2113   case MVT::v2i32: OpcodeIndex = 2; break;
2114     // Quad-register operations:
2115   case MVT::v8i16: OpcodeIndex = 0; break;
2116   case MVT::v4f32:
2117   case MVT::v4i32: OpcodeIndex = 1; break;
2118   }
2119
2120   std::vector<EVT> ResTys;
2121   if (IsLoad) {
2122     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2123     if (!is64BitVector)
2124       ResTyElts *= 2;
2125     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2126                                       MVT::i64, ResTyElts));
2127   }
2128   if (isUpdating)
2129     ResTys.push_back(MVT::i32);
2130   ResTys.push_back(MVT::Other);
2131
2132   SDValue Pred = getAL(CurDAG, dl);
2133   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2134
2135   SmallVector<SDValue, 8> Ops;
2136   Ops.push_back(MemAddr);
2137   Ops.push_back(Align);
2138   if (isUpdating) {
2139     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2140     Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
2141   }
2142
2143   SDValue SuperReg;
2144   SDValue V0 = N->getOperand(Vec0Idx + 0);
2145   SDValue V1 = N->getOperand(Vec0Idx + 1);
2146   if (NumVecs == 2) {
2147     if (is64BitVector)
2148       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2149     else
2150       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2151   } else {
2152     SDValue V2 = N->getOperand(Vec0Idx + 2);
2153     SDValue V3 = (NumVecs == 3)
2154       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2155       : N->getOperand(Vec0Idx + 3);
2156     if (is64BitVector)
2157       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2158     else
2159       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2160   }
2161   Ops.push_back(SuperReg);
2162   Ops.push_back(getI32Imm(Lane, dl));
2163   Ops.push_back(Pred);
2164   Ops.push_back(Reg0);
2165   Ops.push_back(Chain);
2166
2167   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2168                                   QOpcodes[OpcodeIndex]);
2169   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2170   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2171   if (!IsLoad)
2172     return VLdLn;
2173
2174   // Extract the subregisters.
2175   SuperReg = SDValue(VLdLn, 0);
2176   assert(ARM::dsub_7 == ARM::dsub_0+7 &&
2177          ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
2178   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2179   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2180     ReplaceUses(SDValue(N, Vec),
2181                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2182   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2183   if (isUpdating)
2184     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2185   return nullptr;
2186 }
2187
2188 SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
2189                                       unsigned NumVecs,
2190                                       const uint16_t *Opcodes) {
2191   assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2192   SDLoc dl(N);
2193
2194   SDValue MemAddr, Align;
2195   if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2196     return nullptr;
2197
2198   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2199   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2200
2201   SDValue Chain = N->getOperand(0);
2202   EVT VT = N->getValueType(0);
2203
2204   unsigned Alignment = 0;
2205   if (NumVecs != 3) {
2206     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2207     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2208     if (Alignment > NumBytes)
2209       Alignment = NumBytes;
2210     if (Alignment < 8 && Alignment < NumBytes)
2211       Alignment = 0;
2212     // Alignment must be a power of two; make sure of that.
2213     Alignment = (Alignment & -Alignment);
2214     if (Alignment == 1)
2215       Alignment = 0;
2216   }
2217   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2218
2219   unsigned OpcodeIndex;
2220   switch (VT.getSimpleVT().SimpleTy) {
2221   default: llvm_unreachable("unhandled vld-dup type");
2222   case MVT::v8i8:  OpcodeIndex = 0; break;
2223   case MVT::v4i16: OpcodeIndex = 1; break;
2224   case MVT::v2f32:
2225   case MVT::v2i32: OpcodeIndex = 2; break;
2226   }
2227
2228   SDValue Pred = getAL(CurDAG, dl);
2229   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2230   SDValue SuperReg;
2231   unsigned Opc = Opcodes[OpcodeIndex];
2232   SmallVector<SDValue, 6> Ops;
2233   Ops.push_back(MemAddr);
2234   Ops.push_back(Align);
2235   if (isUpdating) {
2236     // fixed-stride update instructions don't have an explicit writeback
2237     // operand. It's implicit in the opcode itself.
2238     SDValue Inc = N->getOperand(2);
2239     if (!isa<ConstantSDNode>(Inc.getNode()))
2240       Ops.push_back(Inc);
2241     // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2242     else if (NumVecs > 2)
2243       Ops.push_back(Reg0);
2244   }
2245   Ops.push_back(Pred);
2246   Ops.push_back(Reg0);
2247   Ops.push_back(Chain);
2248
2249   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2250   std::vector<EVT> ResTys;
2251   ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2252   if (isUpdating)
2253     ResTys.push_back(MVT::i32);
2254   ResTys.push_back(MVT::Other);
2255   SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2256   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2257   SuperReg = SDValue(VLdDup, 0);
2258
2259   // Extract the subregisters.
2260   assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2261   unsigned SubIdx = ARM::dsub_0;
2262   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2263     ReplaceUses(SDValue(N, Vec),
2264                 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2265   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2266   if (isUpdating)
2267     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2268   return nullptr;
2269 }
2270
2271 SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
2272                                     unsigned Opc) {
2273   assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
2274   SDLoc dl(N);
2275   EVT VT = N->getValueType(0);
2276   unsigned FirstTblReg = IsExt ? 2 : 1;
2277
2278   // Form a REG_SEQUENCE to force register allocation.
2279   SDValue RegSeq;
2280   SDValue V0 = N->getOperand(FirstTblReg + 0);
2281   SDValue V1 = N->getOperand(FirstTblReg + 1);
2282   if (NumVecs == 2)
2283     RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
2284   else {
2285     SDValue V2 = N->getOperand(FirstTblReg + 2);
2286     // If it's a vtbl3, form a quad D-register and leave the last part as
2287     // an undef.
2288     SDValue V3 = (NumVecs == 3)
2289       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2290       : N->getOperand(FirstTblReg + 3);
2291     RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2292   }
2293
2294   SmallVector<SDValue, 6> Ops;
2295   if (IsExt)
2296     Ops.push_back(N->getOperand(1));
2297   Ops.push_back(RegSeq);
2298   Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
2299   Ops.push_back(getAL(CurDAG, dl)); // predicate
2300   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
2301   return CurDAG->getMachineNode(Opc, dl, VT, Ops);
2302 }
2303
2304 SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
2305                                                      bool isSigned) {
2306   if (!Subtarget->hasV6T2Ops())
2307     return nullptr;
2308
2309   unsigned Opc = isSigned
2310     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2311     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2312   SDLoc dl(N);
2313
2314   // For unsigned extracts, check for a shift right and mask
2315   unsigned And_imm = 0;
2316   if (N->getOpcode() == ISD::AND) {
2317     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2318
2319       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2320       if (And_imm & (And_imm + 1))
2321         return nullptr;
2322
2323       unsigned Srl_imm = 0;
2324       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2325                                 Srl_imm)) {
2326         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2327
2328         // Note: The width operand is encoded as width-1.
2329         unsigned Width = countTrailingOnes(And_imm) - 1;
2330         unsigned LSB = Srl_imm;
2331
2332         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2333
2334         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2335           // It's cheaper to use a right shift to extract the top bits.
2336           if (Subtarget->isThumb()) {
2337             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2338             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2339                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2340                               getAL(CurDAG, dl), Reg0, Reg0 };
2341             return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2342           }
2343
2344           // ARM models shift instructions as MOVsi with shifter operand.
2345           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2346           SDValue ShOpc =
2347             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2348                                       MVT::i32);
2349           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2350                             getAL(CurDAG, dl), Reg0, Reg0 };
2351           return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2352         }
2353
2354         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2355                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2356                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2357                           getAL(CurDAG, dl), Reg0 };
2358         return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2359       }
2360     }
2361     return nullptr;
2362   }
2363
2364   // Otherwise, we're looking for a shift of a shift
2365   unsigned Shl_imm = 0;
2366   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2367     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2368     unsigned Srl_imm = 0;
2369     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2370       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2371       // Note: The width operand is encoded as width-1.
2372       unsigned Width = 32 - Srl_imm - 1;
2373       int LSB = Srl_imm - Shl_imm;
2374       if (LSB < 0)
2375         return nullptr;
2376       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2377       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2378                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2379                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2380                         getAL(CurDAG, dl), Reg0 };
2381       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2382     }
2383   }
2384
2385   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2386     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2387     unsigned LSB = 0;
2388     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2389         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2390       return nullptr;
2391
2392     if (LSB + Width > 32)
2393       return nullptr;
2394
2395     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2396     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2397                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2398                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2399                       getAL(CurDAG, dl), Reg0 };
2400     return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2401   }
2402
2403   return nullptr;
2404 }
2405
2406 /// Target-specific DAG combining for ISD::XOR.
2407 /// Target-independent combining lowers SELECT_CC nodes of the form
2408 /// select_cc setg[ge] X,  0,  X, -X
2409 /// select_cc setgt    X, -1,  X, -X
2410 /// select_cc setl[te] X,  0, -X,  X
2411 /// select_cc setlt    X,  1, -X,  X
2412 /// which represent Integer ABS into:
2413 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2414 /// ARM instruction selection detects the latter and matches it to
2415 /// ARM::ABS or ARM::t2ABS machine node.
2416 SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
2417   SDValue XORSrc0 = N->getOperand(0);
2418   SDValue XORSrc1 = N->getOperand(1);
2419   EVT VT = N->getValueType(0);
2420
2421   if (Subtarget->isThumb1Only())
2422     return nullptr;
2423
2424   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2425     return nullptr;
2426
2427   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2428   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2429   SDValue SRASrc0 = XORSrc1.getOperand(0);
2430   SDValue SRASrc1 = XORSrc1.getOperand(1);
2431   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2432   EVT XType = SRASrc0.getValueType();
2433   unsigned Size = XType.getSizeInBits() - 1;
2434
2435   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2436       XType.isInteger() && SRAConstant != nullptr &&
2437       Size == SRAConstant->getZExtValue()) {
2438     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2439     return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2440   }
2441
2442   return nullptr;
2443 }
2444
2445 SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
2446   // The only time a CONCAT_VECTORS operation can have legal types is when
2447   // two 64-bit vectors are concatenated to a 128-bit vector.
2448   EVT VT = N->getValueType(0);
2449   if (!VT.is128BitVector() || N->getNumOperands() != 2)
2450     llvm_unreachable("unexpected CONCAT_VECTORS");
2451   return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
2452 }
2453
2454 SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
2455   SDLoc dl(N);
2456
2457   if (N->isMachineOpcode()) {
2458     N->setNodeId(-1);
2459     return nullptr;   // Already selected.
2460   }
2461
2462   switch (N->getOpcode()) {
2463   default: break;
2464   case ISD::WRITE_REGISTER: {
2465     SDNode *ResNode = SelectWriteRegister(N);
2466     if (ResNode)
2467       return ResNode;
2468     break;
2469   }
2470   case ISD::READ_REGISTER: {
2471     SDNode *ResNode = SelectReadRegister(N);
2472     if (ResNode)
2473       return ResNode;
2474     break;
2475   }
2476   case ISD::INLINEASM: {
2477     SDNode *ResNode = SelectInlineAsm(N);
2478     if (ResNode)
2479       return ResNode;
2480     break;
2481   }
2482   case ISD::XOR: {
2483     // Select special operations if XOR node forms integer ABS pattern
2484     SDNode *ResNode = SelectABSOp(N);
2485     if (ResNode)
2486       return ResNode;
2487     // Other cases are autogenerated.
2488     break;
2489   }
2490   case ISD::Constant: {
2491     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2492     bool UseCP = true;
2493     if (Subtarget->useMovt(*MF))
2494       // Thumb2-aware targets have the MOVT instruction, so all immediates can
2495       // be done with MOV + MOVT, at worst.
2496       UseCP = false;
2497     else {
2498       if (Subtarget->isThumb()) {
2499         UseCP = (Val > 255 &&                                  // MOV
2500                  ~Val > 255 &&                                 // MOV + MVN
2501                  !ARM_AM::isThumbImmShiftedVal(Val) &&         // MOV + LSL
2502                  !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW
2503       } else
2504         UseCP = (ARM_AM::getSOImmVal(Val) == -1 &&             // MOV
2505                  ARM_AM::getSOImmVal(~Val) == -1 &&            // MVN
2506                  !ARM_AM::isSOImmTwoPartVal(Val) &&            // two instrs.
2507                  !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW
2508     }
2509
2510     if (UseCP) {
2511       SDValue CPIdx = CurDAG->getTargetConstantPool(
2512           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2513           TLI->getPointerTy());
2514
2515       SDNode *ResNode;
2516       if (Subtarget->isThumb()) {
2517         SDValue Pred = getAL(CurDAG, dl);
2518         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2519         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2520         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2521                                          Ops);
2522       } else {
2523         SDValue Ops[] = {
2524           CPIdx,
2525           CurDAG->getTargetConstant(0, dl, MVT::i32),
2526           getAL(CurDAG, dl),
2527           CurDAG->getRegister(0, MVT::i32),
2528           CurDAG->getEntryNode()
2529         };
2530         ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2531                                        Ops);
2532       }
2533       ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
2534       return nullptr;
2535     }
2536
2537     // Other cases are autogenerated.
2538     break;
2539   }
2540   case ISD::FrameIndex: {
2541     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2542     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2543     SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
2544     if (Subtarget->isThumb1Only()) {
2545       // Set the alignment of the frame object to 4, to avoid having to generate
2546       // more than one ADD
2547       MachineFrameInfo *MFI = MF->getFrameInfo();
2548       if (MFI->getObjectAlignment(FI) < 4)
2549         MFI->setObjectAlignment(FI, 4);
2550       return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2551                                   CurDAG->getTargetConstant(0, dl, MVT::i32));
2552     } else {
2553       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2554                       ARM::t2ADDri : ARM::ADDri);
2555       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2556                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2557                         CurDAG->getRegister(0, MVT::i32) };
2558       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2559     }
2560   }
2561   case ISD::SRL:
2562     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2563       return I;
2564     break;
2565   case ISD::SIGN_EXTEND_INREG:
2566   case ISD::SRA:
2567     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
2568       return I;
2569     break;
2570   case ISD::MUL:
2571     if (Subtarget->isThumb1Only())
2572       break;
2573     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2574       unsigned RHSV = C->getZExtValue();
2575       if (!RHSV) break;
2576       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2577         unsigned ShImm = Log2_32(RHSV-1);
2578         if (ShImm >= 32)
2579           break;
2580         SDValue V = N->getOperand(0);
2581         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2582         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2583         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2584         if (Subtarget->isThumb()) {
2585           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2586           return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2587         } else {
2588           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2589                             Reg0 };
2590           return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2591         }
2592       }
2593       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2594         unsigned ShImm = Log2_32(RHSV+1);
2595         if (ShImm >= 32)
2596           break;
2597         SDValue V = N->getOperand(0);
2598         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2599         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2600         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2601         if (Subtarget->isThumb()) {
2602           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2603           return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2604         } else {
2605           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2606                             Reg0 };
2607           return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2608         }
2609       }
2610     }
2611     break;
2612   case ISD::AND: {
2613     // Check for unsigned bitfield extract
2614     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2615       return I;
2616
2617     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2618     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2619     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2620     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2621     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2622     EVT VT = N->getValueType(0);
2623     if (VT != MVT::i32)
2624       break;
2625     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2626       ? ARM::t2MOVTi16
2627       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2628     if (!Opc)
2629       break;
2630     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2631     ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2632     if (!N1C)
2633       break;
2634     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2635       SDValue N2 = N0.getOperand(1);
2636       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2637       if (!N2C)
2638         break;
2639       unsigned N1CVal = N1C->getZExtValue();
2640       unsigned N2CVal = N2C->getZExtValue();
2641       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2642           (N1CVal & 0xffffU) == 0xffffU &&
2643           (N2CVal & 0xffffU) == 0x0U) {
2644         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2645                                                   dl, MVT::i32);
2646         SDValue Ops[] = { N0.getOperand(0), Imm16,
2647                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2648         return CurDAG->getMachineNode(Opc, dl, VT, Ops);
2649       }
2650     }
2651     break;
2652   }
2653   case ARMISD::VMOVRRD:
2654     return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
2655                                   N->getOperand(0), getAL(CurDAG, dl),
2656                                   CurDAG->getRegister(0, MVT::i32));
2657   case ISD::UMUL_LOHI: {
2658     if (Subtarget->isThumb1Only())
2659       break;
2660     if (Subtarget->isThumb()) {
2661       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2662                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2663       return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
2664     } else {
2665       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2666                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2667                         CurDAG->getRegister(0, MVT::i32) };
2668       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2669                                     ARM::UMULL : ARM::UMULLv5,
2670                                     dl, MVT::i32, MVT::i32, Ops);
2671     }
2672   }
2673   case ISD::SMUL_LOHI: {
2674     if (Subtarget->isThumb1Only())
2675       break;
2676     if (Subtarget->isThumb()) {
2677       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2678                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2679       return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
2680     } else {
2681       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2682                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2683                         CurDAG->getRegister(0, MVT::i32) };
2684       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2685                                     ARM::SMULL : ARM::SMULLv5,
2686                                     dl, MVT::i32, MVT::i32, Ops);
2687     }
2688   }
2689   case ARMISD::UMLAL:{
2690     if (Subtarget->isThumb()) {
2691       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2692                         N->getOperand(3), getAL(CurDAG, dl),
2693                         CurDAG->getRegister(0, MVT::i32)};
2694       return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
2695     }else{
2696       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2697                         N->getOperand(3), getAL(CurDAG, dl),
2698                         CurDAG->getRegister(0, MVT::i32),
2699                         CurDAG->getRegister(0, MVT::i32) };
2700       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2701                                       ARM::UMLAL : ARM::UMLALv5,
2702                                       dl, MVT::i32, MVT::i32, Ops);
2703     }
2704   }
2705   case ARMISD::SMLAL:{
2706     if (Subtarget->isThumb()) {
2707       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2708                         N->getOperand(3), getAL(CurDAG, dl),
2709                         CurDAG->getRegister(0, MVT::i32)};
2710       return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
2711     }else{
2712       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2713                         N->getOperand(3), getAL(CurDAG, dl),
2714                         CurDAG->getRegister(0, MVT::i32),
2715                         CurDAG->getRegister(0, MVT::i32) };
2716       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2717                                       ARM::SMLAL : ARM::SMLALv5,
2718                                       dl, MVT::i32, MVT::i32, Ops);
2719     }
2720   }
2721   case ISD::LOAD: {
2722     SDNode *ResNode = nullptr;
2723     if (Subtarget->isThumb() && Subtarget->hasThumb2())
2724       ResNode = SelectT2IndexedLoad(N);
2725     else
2726       ResNode = SelectARMIndexedLoad(N);
2727     if (ResNode)
2728       return ResNode;
2729     // Other cases are autogenerated.
2730     break;
2731   }
2732   case ARMISD::BRCOND: {
2733     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2734     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2735     // Pattern complexity = 6  cost = 1  size = 0
2736
2737     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2738     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2739     // Pattern complexity = 6  cost = 1  size = 0
2740
2741     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2742     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2743     // Pattern complexity = 6  cost = 1  size = 0
2744
2745     unsigned Opc = Subtarget->isThumb() ?
2746       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2747     SDValue Chain = N->getOperand(0);
2748     SDValue N1 = N->getOperand(1);
2749     SDValue N2 = N->getOperand(2);
2750     SDValue N3 = N->getOperand(3);
2751     SDValue InFlag = N->getOperand(4);
2752     assert(N1.getOpcode() == ISD::BasicBlock);
2753     assert(N2.getOpcode() == ISD::Constant);
2754     assert(N3.getOpcode() == ISD::Register);
2755
2756     SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
2757                                cast<ConstantSDNode>(N2)->getZExtValue()), dl,
2758                                MVT::i32);
2759     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
2760     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
2761                                              MVT::Glue, Ops);
2762     Chain = SDValue(ResNode, 0);
2763     if (N->getNumValues() == 2) {
2764       InFlag = SDValue(ResNode, 1);
2765       ReplaceUses(SDValue(N, 1), InFlag);
2766     }
2767     ReplaceUses(SDValue(N, 0),
2768                 SDValue(Chain.getNode(), Chain.getResNo()));
2769     return nullptr;
2770   }
2771   case ARMISD::VZIP: {
2772     unsigned Opc = 0;
2773     EVT VT = N->getValueType(0);
2774     switch (VT.getSimpleVT().SimpleTy) {
2775     default: return nullptr;
2776     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
2777     case MVT::v4i16: Opc = ARM::VZIPd16; break;
2778     case MVT::v2f32:
2779     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2780     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2781     case MVT::v16i8: Opc = ARM::VZIPq8; break;
2782     case MVT::v8i16: Opc = ARM::VZIPq16; break;
2783     case MVT::v4f32:
2784     case MVT::v4i32: Opc = ARM::VZIPq32; break;
2785     }
2786     SDValue Pred = getAL(CurDAG, dl);
2787     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2788     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2789     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2790   }
2791   case ARMISD::VUZP: {
2792     unsigned Opc = 0;
2793     EVT VT = N->getValueType(0);
2794     switch (VT.getSimpleVT().SimpleTy) {
2795     default: return nullptr;
2796     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
2797     case MVT::v4i16: Opc = ARM::VUZPd16; break;
2798     case MVT::v2f32:
2799     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2800     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2801     case MVT::v16i8: Opc = ARM::VUZPq8; break;
2802     case MVT::v8i16: Opc = ARM::VUZPq16; break;
2803     case MVT::v4f32:
2804     case MVT::v4i32: Opc = ARM::VUZPq32; break;
2805     }
2806     SDValue Pred = getAL(CurDAG, dl);
2807     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2808     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2809     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2810   }
2811   case ARMISD::VTRN: {
2812     unsigned Opc = 0;
2813     EVT VT = N->getValueType(0);
2814     switch (VT.getSimpleVT().SimpleTy) {
2815     default: return nullptr;
2816     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
2817     case MVT::v4i16: Opc = ARM::VTRNd16; break;
2818     case MVT::v2f32:
2819     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2820     case MVT::v16i8: Opc = ARM::VTRNq8; break;
2821     case MVT::v8i16: Opc = ARM::VTRNq16; break;
2822     case MVT::v4f32:
2823     case MVT::v4i32: Opc = ARM::VTRNq32; break;
2824     }
2825     SDValue Pred = getAL(CurDAG, dl);
2826     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2827     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2828     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2829   }
2830   case ARMISD::BUILD_VECTOR: {
2831     EVT VecVT = N->getValueType(0);
2832     EVT EltVT = VecVT.getVectorElementType();
2833     unsigned NumElts = VecVT.getVectorNumElements();
2834     if (EltVT == MVT::f64) {
2835       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
2836       return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
2837     }
2838     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
2839     if (NumElts == 2)
2840       return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
2841     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
2842     return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
2843                      N->getOperand(2), N->getOperand(3));
2844   }
2845
2846   case ARMISD::VLD2DUP: {
2847     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
2848                                         ARM::VLD2DUPd32 };
2849     return SelectVLDDup(N, false, 2, Opcodes);
2850   }
2851
2852   case ARMISD::VLD3DUP: {
2853     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
2854                                         ARM::VLD3DUPd16Pseudo,
2855                                         ARM::VLD3DUPd32Pseudo };
2856     return SelectVLDDup(N, false, 3, Opcodes);
2857   }
2858
2859   case ARMISD::VLD4DUP: {
2860     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
2861                                         ARM::VLD4DUPd16Pseudo,
2862                                         ARM::VLD4DUPd32Pseudo };
2863     return SelectVLDDup(N, false, 4, Opcodes);
2864   }
2865
2866   case ARMISD::VLD2DUP_UPD: {
2867     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
2868                                         ARM::VLD2DUPd16wb_fixed,
2869                                         ARM::VLD2DUPd32wb_fixed };
2870     return SelectVLDDup(N, true, 2, Opcodes);
2871   }
2872
2873   case ARMISD::VLD3DUP_UPD: {
2874     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
2875                                         ARM::VLD3DUPd16Pseudo_UPD,
2876                                         ARM::VLD3DUPd32Pseudo_UPD };
2877     return SelectVLDDup(N, true, 3, Opcodes);
2878   }
2879
2880   case ARMISD::VLD4DUP_UPD: {
2881     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
2882                                         ARM::VLD4DUPd16Pseudo_UPD,
2883                                         ARM::VLD4DUPd32Pseudo_UPD };
2884     return SelectVLDDup(N, true, 4, Opcodes);
2885   }
2886
2887   case ARMISD::VLD1_UPD: {
2888     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
2889                                          ARM::VLD1d16wb_fixed,
2890                                          ARM::VLD1d32wb_fixed,
2891                                          ARM::VLD1d64wb_fixed };
2892     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
2893                                          ARM::VLD1q16wb_fixed,
2894                                          ARM::VLD1q32wb_fixed,
2895                                          ARM::VLD1q64wb_fixed };
2896     return SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
2897   }
2898
2899   case ARMISD::VLD2_UPD: {
2900     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
2901                                          ARM::VLD2d16wb_fixed,
2902                                          ARM::VLD2d32wb_fixed,
2903                                          ARM::VLD1q64wb_fixed};
2904     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
2905                                          ARM::VLD2q16PseudoWB_fixed,
2906                                          ARM::VLD2q32PseudoWB_fixed };
2907     return SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
2908   }
2909
2910   case ARMISD::VLD3_UPD: {
2911     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
2912                                          ARM::VLD3d16Pseudo_UPD,
2913                                          ARM::VLD3d32Pseudo_UPD,
2914                                          ARM::VLD1d64TPseudoWB_fixed};
2915     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
2916                                           ARM::VLD3q16Pseudo_UPD,
2917                                           ARM::VLD3q32Pseudo_UPD };
2918     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
2919                                           ARM::VLD3q16oddPseudo_UPD,
2920                                           ARM::VLD3q32oddPseudo_UPD };
2921     return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
2922   }
2923
2924   case ARMISD::VLD4_UPD: {
2925     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
2926                                          ARM::VLD4d16Pseudo_UPD,
2927                                          ARM::VLD4d32Pseudo_UPD,
2928                                          ARM::VLD1d64QPseudoWB_fixed};
2929     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
2930                                           ARM::VLD4q16Pseudo_UPD,
2931                                           ARM::VLD4q32Pseudo_UPD };
2932     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
2933                                           ARM::VLD4q16oddPseudo_UPD,
2934                                           ARM::VLD4q32oddPseudo_UPD };
2935     return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
2936   }
2937
2938   case ARMISD::VLD2LN_UPD: {
2939     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
2940                                          ARM::VLD2LNd16Pseudo_UPD,
2941                                          ARM::VLD2LNd32Pseudo_UPD };
2942     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
2943                                          ARM::VLD2LNq32Pseudo_UPD };
2944     return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
2945   }
2946
2947   case ARMISD::VLD3LN_UPD: {
2948     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
2949                                          ARM::VLD3LNd16Pseudo_UPD,
2950                                          ARM::VLD3LNd32Pseudo_UPD };
2951     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
2952                                          ARM::VLD3LNq32Pseudo_UPD };
2953     return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
2954   }
2955
2956   case ARMISD::VLD4LN_UPD: {
2957     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
2958                                          ARM::VLD4LNd16Pseudo_UPD,
2959                                          ARM::VLD4LNd32Pseudo_UPD };
2960     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
2961                                          ARM::VLD4LNq32Pseudo_UPD };
2962     return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
2963   }
2964
2965   case ARMISD::VST1_UPD: {
2966     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
2967                                          ARM::VST1d16wb_fixed,
2968                                          ARM::VST1d32wb_fixed,
2969                                          ARM::VST1d64wb_fixed };
2970     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
2971                                          ARM::VST1q16wb_fixed,
2972                                          ARM::VST1q32wb_fixed,
2973                                          ARM::VST1q64wb_fixed };
2974     return SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
2975   }
2976
2977   case ARMISD::VST2_UPD: {
2978     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
2979                                          ARM::VST2d16wb_fixed,
2980                                          ARM::VST2d32wb_fixed,
2981                                          ARM::VST1q64wb_fixed};
2982     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
2983                                          ARM::VST2q16PseudoWB_fixed,
2984                                          ARM::VST2q32PseudoWB_fixed };
2985     return SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
2986   }
2987
2988   case ARMISD::VST3_UPD: {
2989     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
2990                                          ARM::VST3d16Pseudo_UPD,
2991                                          ARM::VST3d32Pseudo_UPD,
2992                                          ARM::VST1d64TPseudoWB_fixed};
2993     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
2994                                           ARM::VST3q16Pseudo_UPD,
2995                                           ARM::VST3q32Pseudo_UPD };
2996     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
2997                                           ARM::VST3q16oddPseudo_UPD,
2998                                           ARM::VST3q32oddPseudo_UPD };
2999     return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3000   }
3001
3002   case ARMISD::VST4_UPD: {
3003     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3004                                          ARM::VST4d16Pseudo_UPD,
3005                                          ARM::VST4d32Pseudo_UPD,
3006                                          ARM::VST1d64QPseudoWB_fixed};
3007     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3008                                           ARM::VST4q16Pseudo_UPD,
3009                                           ARM::VST4q32Pseudo_UPD };
3010     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3011                                           ARM::VST4q16oddPseudo_UPD,
3012                                           ARM::VST4q32oddPseudo_UPD };
3013     return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3014   }
3015
3016   case ARMISD::VST2LN_UPD: {
3017     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3018                                          ARM::VST2LNd16Pseudo_UPD,
3019                                          ARM::VST2LNd32Pseudo_UPD };
3020     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3021                                          ARM::VST2LNq32Pseudo_UPD };
3022     return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3023   }
3024
3025   case ARMISD::VST3LN_UPD: {
3026     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3027                                          ARM::VST3LNd16Pseudo_UPD,
3028                                          ARM::VST3LNd32Pseudo_UPD };
3029     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3030                                          ARM::VST3LNq32Pseudo_UPD };
3031     return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3032   }
3033
3034   case ARMISD::VST4LN_UPD: {
3035     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3036                                          ARM::VST4LNd16Pseudo_UPD,
3037                                          ARM::VST4LNd32Pseudo_UPD };
3038     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3039                                          ARM::VST4LNq32Pseudo_UPD };
3040     return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3041   }
3042
3043   case ISD::INTRINSIC_VOID:
3044   case ISD::INTRINSIC_W_CHAIN: {
3045     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3046     switch (IntNo) {
3047     default:
3048       break;
3049
3050     case Intrinsic::arm_ldaexd:
3051     case Intrinsic::arm_ldrexd: {
3052       SDLoc dl(N);
3053       SDValue Chain = N->getOperand(0);
3054       SDValue MemAddr = N->getOperand(2);
3055       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3056
3057       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3058       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3059                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3060
3061       // arm_ldrexd returns a i64 value in {i32, i32}
3062       std::vector<EVT> ResTys;
3063       if (isThumb) {
3064         ResTys.push_back(MVT::i32);
3065         ResTys.push_back(MVT::i32);
3066       } else
3067         ResTys.push_back(MVT::Untyped);
3068       ResTys.push_back(MVT::Other);
3069
3070       // Place arguments in the right order.
3071       SmallVector<SDValue, 7> Ops;
3072       Ops.push_back(MemAddr);
3073       Ops.push_back(getAL(CurDAG, dl));
3074       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3075       Ops.push_back(Chain);
3076       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3077       // Transfer memoperands.
3078       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3079       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3080       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3081
3082       // Remap uses.
3083       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3084       if (!SDValue(N, 0).use_empty()) {
3085         SDValue Result;
3086         if (isThumb)
3087           Result = SDValue(Ld, 0);
3088         else {
3089           SDValue SubRegIdx =
3090             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3091           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3092               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3093           Result = SDValue(ResNode,0);
3094         }
3095         ReplaceUses(SDValue(N, 0), Result);
3096       }
3097       if (!SDValue(N, 1).use_empty()) {
3098         SDValue Result;
3099         if (isThumb)
3100           Result = SDValue(Ld, 1);
3101         else {
3102           SDValue SubRegIdx =
3103             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3104           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3105               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3106           Result = SDValue(ResNode,0);
3107         }
3108         ReplaceUses(SDValue(N, 1), Result);
3109       }
3110       ReplaceUses(SDValue(N, 2), OutChain);
3111       return nullptr;
3112     }
3113     case Intrinsic::arm_stlexd:
3114     case Intrinsic::arm_strexd: {
3115       SDLoc dl(N);
3116       SDValue Chain = N->getOperand(0);
3117       SDValue Val0 = N->getOperand(2);
3118       SDValue Val1 = N->getOperand(3);
3119       SDValue MemAddr = N->getOperand(4);
3120
3121       // Store exclusive double return a i32 value which is the return status
3122       // of the issued store.
3123       const EVT ResTys[] = {MVT::i32, MVT::Other};
3124
3125       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3126       // Place arguments in the right order.
3127       SmallVector<SDValue, 7> Ops;
3128       if (isThumb) {
3129         Ops.push_back(Val0);
3130         Ops.push_back(Val1);
3131       } else
3132         // arm_strexd uses GPRPair.
3133         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3134       Ops.push_back(MemAddr);
3135       Ops.push_back(getAL(CurDAG, dl));
3136       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3137       Ops.push_back(Chain);
3138
3139       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3140       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3141                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3142
3143       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3144       // Transfer memoperands.
3145       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3146       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3147       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3148
3149       return St;
3150     }
3151
3152     case Intrinsic::arm_neon_vld1: {
3153       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3154                                            ARM::VLD1d32, ARM::VLD1d64 };
3155       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3156                                            ARM::VLD1q32, ARM::VLD1q64};
3157       return SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3158     }
3159
3160     case Intrinsic::arm_neon_vld2: {
3161       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3162                                            ARM::VLD2d32, ARM::VLD1q64 };
3163       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3164                                            ARM::VLD2q32Pseudo };
3165       return SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3166     }
3167
3168     case Intrinsic::arm_neon_vld3: {
3169       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3170                                            ARM::VLD3d16Pseudo,
3171                                            ARM::VLD3d32Pseudo,
3172                                            ARM::VLD1d64TPseudo };
3173       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3174                                             ARM::VLD3q16Pseudo_UPD,
3175                                             ARM::VLD3q32Pseudo_UPD };
3176       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3177                                             ARM::VLD3q16oddPseudo,
3178                                             ARM::VLD3q32oddPseudo };
3179       return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3180     }
3181
3182     case Intrinsic::arm_neon_vld4: {
3183       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3184                                            ARM::VLD4d16Pseudo,
3185                                            ARM::VLD4d32Pseudo,
3186                                            ARM::VLD1d64QPseudo };
3187       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3188                                             ARM::VLD4q16Pseudo_UPD,
3189                                             ARM::VLD4q32Pseudo_UPD };
3190       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3191                                             ARM::VLD4q16oddPseudo,
3192                                             ARM::VLD4q32oddPseudo };
3193       return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3194     }
3195
3196     case Intrinsic::arm_neon_vld2lane: {
3197       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3198                                            ARM::VLD2LNd16Pseudo,
3199                                            ARM::VLD2LNd32Pseudo };
3200       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3201                                            ARM::VLD2LNq32Pseudo };
3202       return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3203     }
3204
3205     case Intrinsic::arm_neon_vld3lane: {
3206       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3207                                            ARM::VLD3LNd16Pseudo,
3208                                            ARM::VLD3LNd32Pseudo };
3209       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3210                                            ARM::VLD3LNq32Pseudo };
3211       return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3212     }
3213
3214     case Intrinsic::arm_neon_vld4lane: {
3215       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3216                                            ARM::VLD4LNd16Pseudo,
3217                                            ARM::VLD4LNd32Pseudo };
3218       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3219                                            ARM::VLD4LNq32Pseudo };
3220       return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3221     }
3222
3223     case Intrinsic::arm_neon_vst1: {
3224       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3225                                            ARM::VST1d32, ARM::VST1d64 };
3226       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3227                                            ARM::VST1q32, ARM::VST1q64 };
3228       return SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3229     }
3230
3231     case Intrinsic::arm_neon_vst2: {
3232       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3233                                            ARM::VST2d32, ARM::VST1q64 };
3234       static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3235                                      ARM::VST2q32Pseudo };
3236       return SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3237     }
3238
3239     case Intrinsic::arm_neon_vst3: {
3240       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3241                                            ARM::VST3d16Pseudo,
3242                                            ARM::VST3d32Pseudo,
3243                                            ARM::VST1d64TPseudo };
3244       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3245                                             ARM::VST3q16Pseudo_UPD,
3246                                             ARM::VST3q32Pseudo_UPD };
3247       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3248                                             ARM::VST3q16oddPseudo,
3249                                             ARM::VST3q32oddPseudo };
3250       return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3251     }
3252
3253     case Intrinsic::arm_neon_vst4: {
3254       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3255                                            ARM::VST4d16Pseudo,
3256                                            ARM::VST4d32Pseudo,
3257                                            ARM::VST1d64QPseudo };
3258       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3259                                             ARM::VST4q16Pseudo_UPD,
3260                                             ARM::VST4q32Pseudo_UPD };
3261       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3262                                             ARM::VST4q16oddPseudo,
3263                                             ARM::VST4q32oddPseudo };
3264       return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3265     }
3266
3267     case Intrinsic::arm_neon_vst2lane: {
3268       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3269                                            ARM::VST2LNd16Pseudo,
3270                                            ARM::VST2LNd32Pseudo };
3271       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3272                                            ARM::VST2LNq32Pseudo };
3273       return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3274     }
3275
3276     case Intrinsic::arm_neon_vst3lane: {
3277       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3278                                            ARM::VST3LNd16Pseudo,
3279                                            ARM::VST3LNd32Pseudo };
3280       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3281                                            ARM::VST3LNq32Pseudo };
3282       return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3283     }
3284
3285     case Intrinsic::arm_neon_vst4lane: {
3286       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3287                                            ARM::VST4LNd16Pseudo,
3288                                            ARM::VST4LNd32Pseudo };
3289       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3290                                            ARM::VST4LNq32Pseudo };
3291       return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3292     }
3293     }
3294     break;
3295   }
3296
3297   case ISD::INTRINSIC_WO_CHAIN: {
3298     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3299     switch (IntNo) {
3300     default:
3301       break;
3302
3303     case Intrinsic::arm_neon_vtbl2:
3304       return SelectVTBL(N, false, 2, ARM::VTBL2);
3305     case Intrinsic::arm_neon_vtbl3:
3306       return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
3307     case Intrinsic::arm_neon_vtbl4:
3308       return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
3309
3310     case Intrinsic::arm_neon_vtbx2:
3311       return SelectVTBL(N, true, 2, ARM::VTBX2);
3312     case Intrinsic::arm_neon_vtbx3:
3313       return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
3314     case Intrinsic::arm_neon_vtbx4:
3315       return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
3316     }
3317     break;
3318   }
3319
3320   case ARMISD::VTBL1: {
3321     SDLoc dl(N);
3322     EVT VT = N->getValueType(0);
3323     SmallVector<SDValue, 6> Ops;
3324
3325     Ops.push_back(N->getOperand(0));
3326     Ops.push_back(N->getOperand(1));
3327     Ops.push_back(getAL(CurDAG, dl));                // Predicate
3328     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3329     return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
3330   }
3331   case ARMISD::VTBL2: {
3332     SDLoc dl(N);
3333     EVT VT = N->getValueType(0);
3334
3335     // Form a REG_SEQUENCE to force register allocation.
3336     SDValue V0 = N->getOperand(0);
3337     SDValue V1 = N->getOperand(1);
3338     SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
3339
3340     SmallVector<SDValue, 6> Ops;
3341     Ops.push_back(RegSeq);
3342     Ops.push_back(N->getOperand(2));
3343     Ops.push_back(getAL(CurDAG, dl));                // Predicate
3344     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3345     return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
3346   }
3347
3348   case ISD::CONCAT_VECTORS:
3349     return SelectConcatVector(N);
3350   }
3351
3352   return SelectCode(N);
3353 }
3354
3355 // Inspect a register string of the form
3356 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3357 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3358 // and obtain the integer operands from them, adding these operands to the
3359 // provided vector.
3360 static void getIntOperandsFromRegisterString(StringRef RegString,
3361                                              SelectionDAG *CurDAG, SDLoc DL,
3362                                              std::vector<SDValue>& Ops) {
3363   SmallVector<StringRef, 5> Fields;
3364   RegString.split(Fields, ":");
3365
3366   if (Fields.size() > 1) {
3367     bool AllIntFields = true;
3368
3369     for (StringRef Field : Fields) {
3370       // Need to trim out leading 'cp' characters and get the integer field.
3371       unsigned IntField;
3372       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3373       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3374     }
3375
3376     assert(AllIntFields &&
3377             "Unexpected non-integer value in special register string.");
3378   }
3379 }
3380
3381 // Maps a Banked Register string to its mask value. The mask value returned is
3382 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3383 // mask operand, which expresses which register is to be used, e.g. r8, and in
3384 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3385 // was invalid.
3386 static inline int getBankedRegisterMask(StringRef RegString) {
3387   return StringSwitch<int>(RegString.lower())
3388           .Case("r8_usr", 0x00)
3389           .Case("r9_usr", 0x01)
3390           .Case("r10_usr", 0x02)
3391           .Case("r11_usr", 0x03)
3392           .Case("r12_usr", 0x04)
3393           .Case("sp_usr", 0x05)
3394           .Case("lr_usr", 0x06)
3395           .Case("r8_fiq", 0x08)
3396           .Case("r9_fiq", 0x09)
3397           .Case("r10_fiq", 0x0a)
3398           .Case("r11_fiq", 0x0b)
3399           .Case("r12_fiq", 0x0c)
3400           .Case("sp_fiq", 0x0d)
3401           .Case("lr_fiq", 0x0e)
3402           .Case("lr_irq", 0x10)
3403           .Case("sp_irq", 0x11)
3404           .Case("lr_svc", 0x12)
3405           .Case("sp_svc", 0x13)
3406           .Case("lr_abt", 0x14)
3407           .Case("sp_abt", 0x15)
3408           .Case("lr_und", 0x16)
3409           .Case("sp_und", 0x17)
3410           .Case("lr_mon", 0x1c)
3411           .Case("sp_mon", 0x1d)
3412           .Case("elr_hyp", 0x1e)
3413           .Case("sp_hyp", 0x1f)
3414           .Case("spsr_fiq", 0x2e)
3415           .Case("spsr_irq", 0x30)
3416           .Case("spsr_svc", 0x32)
3417           .Case("spsr_abt", 0x34)
3418           .Case("spsr_und", 0x36)
3419           .Case("spsr_mon", 0x3c)
3420           .Case("spsr_hyp", 0x3e)
3421           .Default(-1);
3422 }
3423
3424 // Maps a MClass special register string to its value for use in the
3425 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
3426 // Returns -1 to signify that the string was invalid.
3427 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
3428   return StringSwitch<int>(RegString.lower())
3429           .Case("apsr", 0x0)
3430           .Case("iapsr", 0x1)
3431           .Case("eapsr", 0x2)
3432           .Case("xpsr", 0x3)
3433           .Case("ipsr", 0x5)
3434           .Case("epsr", 0x6)
3435           .Case("iepsr", 0x7)
3436           .Case("msp", 0x8)
3437           .Case("psp", 0x9)
3438           .Case("primask", 0x10)
3439           .Case("basepri", 0x11)
3440           .Case("basepri_max", 0x12)
3441           .Case("faultmask", 0x13)
3442           .Case("control", 0x14)
3443           .Default(-1);
3444 }
3445
3446 // The flags here are common to those allowed for apsr in the A class cores and
3447 // those allowed for the special registers in the M class cores. Returns a
3448 // value representing which flags were present, -1 if invalid.
3449 static inline int getMClassFlagsMask(StringRef Flags) {
3450   if (Flags.empty())
3451     return 0x3;
3452
3453   return StringSwitch<int>(Flags)
3454           .Case("g", 0x1)
3455           .Case("nzcvq", 0x2)
3456           .Case("nzcvqg", 0x3)
3457           .Default(-1);
3458 }
3459
3460 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
3461                                  const ARMSubtarget *Subtarget) {
3462   // Ensure that the register (without flags) was a valid M Class special
3463   // register.
3464   int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
3465   if (SYSmvalue == -1)
3466     return -1;
3467
3468   // basepri, basepri_max and faultmask are only valid for V7m.
3469   if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
3470     return -1;
3471
3472   // If it was a read then we won't be expecting flags and so at this point
3473   // we can return the mask.
3474   if (IsRead) {
3475     assert (Flags.empty() && "Unexpected flags for reading M class register.");
3476     return SYSmvalue;
3477   }
3478
3479   // We know we are now handling a write so need to get the mask for the flags.
3480   int Mask = getMClassFlagsMask(Flags);
3481
3482   // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
3483   // shouldn't have flags present.
3484   if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
3485     return -1;
3486
3487   // The _g and _nzcvqg versions are only valid if the DSP extension is
3488   // available.
3489   if (!Subtarget->hasThumb2DSP() && (Mask & 0x2))
3490     return -1;
3491
3492   // The register was valid so need to put the mask in the correct place
3493   // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
3494   // construct the operand for the instruction node.
3495   if (SYSmvalue < 0x4)
3496     return SYSmvalue | Mask << 10;
3497
3498   return SYSmvalue;
3499 }
3500
3501 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3502   // The mask operand contains the special register (R Bit) in bit 4, whether
3503   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3504   // bits 3-0 contains the fields to be accessed in the special register, set by
3505   // the flags provided with the register.
3506   int Mask = 0;
3507   if (Reg == "apsr") {
3508     // The flags permitted for apsr are the same flags that are allowed in
3509     // M class registers. We get the flag value and then shift the flags into
3510     // the correct place to combine with the mask.
3511     Mask = getMClassFlagsMask(Flags);
3512     if (Mask == -1)
3513       return -1;
3514     return Mask << 2;
3515   }
3516
3517   if (Reg != "cpsr" && Reg != "spsr") {
3518     return -1;
3519   }
3520
3521   // This is the same as if the flags were "fc"
3522   if (Flags.empty() || Flags == "all")
3523     return Mask | 0x9;
3524
3525   // Inspect the supplied flags string and set the bits in the mask for
3526   // the relevant and valid flags allowed for cpsr and spsr.
3527   for (char Flag : Flags) {
3528     int FlagVal;
3529     switch (Flag) {
3530       case 'c':
3531         FlagVal = 0x1;
3532         break;
3533       case 'x':
3534         FlagVal = 0x2;
3535         break;
3536       case 's':
3537         FlagVal = 0x4;
3538         break;
3539       case 'f':
3540         FlagVal = 0x8;
3541         break;
3542       default:
3543         FlagVal = 0;
3544     }
3545
3546     // This avoids allowing strings where the same flag bit appears twice.
3547     if (!FlagVal || (Mask & FlagVal))
3548       return -1;
3549     Mask |= FlagVal;
3550   }
3551
3552   // If the register is spsr then we need to set the R bit.
3553   if (Reg == "spsr")
3554     Mask |= 0x10;
3555
3556   return Mask;
3557 }
3558
3559 // Lower the read_register intrinsic to ARM specific DAG nodes
3560 // using the supplied metadata string to select the instruction node to use
3561 // and the registers/masks to construct as operands for the node.
3562 SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){
3563   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3564   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3565   bool IsThumb2 = Subtarget->isThumb2();
3566   SDLoc DL(N);
3567
3568   std::vector<SDValue> Ops;
3569   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3570
3571   if (!Ops.empty()) {
3572     // If the special register string was constructed of fields (as defined
3573     // in the ACLE) then need to lower to MRC node (32 bit) or
3574     // MRRC node(64 bit), we can make the distinction based on the number of
3575     // operands we have.
3576     unsigned Opcode;
3577     SmallVector<EVT, 3> ResTypes;
3578     if (Ops.size() == 5){
3579       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3580       ResTypes.append({ MVT::i32, MVT::Other });
3581     } else {
3582       assert(Ops.size() == 3 &&
3583               "Invalid number of fields in special register string.");
3584       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3585       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3586     }
3587
3588     Ops.push_back(getAL(CurDAG, DL));
3589     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3590     Ops.push_back(N->getOperand(0));
3591     return CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops);
3592   }
3593
3594   std::string SpecialReg = RegString->getString().lower();
3595
3596   int BankedReg = getBankedRegisterMask(SpecialReg);
3597   if (BankedReg != -1) {
3598     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3599             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3600             N->getOperand(0) };
3601     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3602                                   DL, MVT::i32, MVT::Other, Ops);
3603   }
3604
3605   // The VFP registers are read by creating SelectionDAG nodes with opcodes
3606   // corresponding to the register that is being read from. So we switch on the
3607   // string to find which opcode we need to use.
3608   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3609                     .Case("fpscr", ARM::VMRS)
3610                     .Case("fpexc", ARM::VMRS_FPEXC)
3611                     .Case("fpsid", ARM::VMRS_FPSID)
3612                     .Case("mvfr0", ARM::VMRS_MVFR0)
3613                     .Case("mvfr1", ARM::VMRS_MVFR1)
3614                     .Case("mvfr2", ARM::VMRS_MVFR2)
3615                     .Case("fpinst", ARM::VMRS_FPINST)
3616                     .Case("fpinst2", ARM::VMRS_FPINST2)
3617                     .Default(0);
3618
3619   // If an opcode was found then we can lower the read to a VFP instruction.
3620   if (Opcode) {
3621     if (!Subtarget->hasVFP2())
3622       return nullptr;
3623     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
3624       return nullptr;
3625
3626     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3627             N->getOperand(0) };
3628     return CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops);
3629   }
3630
3631   // If the target is M Class then need to validate that the register string
3632   // is an acceptable value, so check that a mask can be constructed from the
3633   // string.
3634   if (Subtarget->isMClass()) {
3635     int SYSmValue = getMClassRegisterMask(SpecialReg, "", true, Subtarget);
3636     if (SYSmValue == -1)
3637       return nullptr;
3638
3639     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
3640                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3641                       N->getOperand(0) };
3642     return CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops);
3643   }
3644
3645   // Here we know the target is not M Class so we need to check if it is one
3646   // of the remaining possible values which are apsr, cpsr or spsr.
3647   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
3648     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3649             N->getOperand(0) };
3650     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, DL,
3651                                   MVT::i32, MVT::Other, Ops);
3652   }
3653
3654   if (SpecialReg == "spsr") {
3655     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3656             N->getOperand(0) };
3657     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys,
3658                                   DL, MVT::i32, MVT::Other, Ops);
3659   }
3660
3661   return nullptr;
3662 }
3663
3664 // Lower the write_register intrinsic to ARM specific DAG nodes
3665 // using the supplied metadata string to select the instruction node to use
3666 // and the registers/masks to use in the nodes
3667 SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
3668   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3669   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3670   bool IsThumb2 = Subtarget->isThumb2();
3671   SDLoc DL(N);
3672
3673   std::vector<SDValue> Ops;
3674   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3675
3676   if (!Ops.empty()) {
3677     // If the special register string was constructed of fields (as defined
3678     // in the ACLE) then need to lower to MCR node (32 bit) or
3679     // MCRR node(64 bit), we can make the distinction based on the number of
3680     // operands we have.
3681     unsigned Opcode;
3682     if (Ops.size() == 5) {
3683       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
3684       Ops.insert(Ops.begin()+2, N->getOperand(2));
3685     } else {
3686       assert(Ops.size() == 3 &&
3687               "Invalid number of fields in special register string.");
3688       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
3689       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
3690       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
3691     }
3692
3693     Ops.push_back(getAL(CurDAG, DL));
3694     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3695     Ops.push_back(N->getOperand(0));
3696
3697     return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
3698   }
3699
3700   std::string SpecialReg = RegString->getString().lower();
3701   int BankedReg = getBankedRegisterMask(SpecialReg);
3702   if (BankedReg != -1) {
3703     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
3704             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3705             N->getOperand(0) };
3706     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
3707                                   DL, MVT::Other, Ops);
3708   }
3709
3710   // The VFP registers are written to by creating SelectionDAG nodes with
3711   // opcodes corresponding to the register that is being written. So we switch
3712   // on the string to find which opcode we need to use.
3713   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3714                     .Case("fpscr", ARM::VMSR)
3715                     .Case("fpexc", ARM::VMSR_FPEXC)
3716                     .Case("fpsid", ARM::VMSR_FPSID)
3717                     .Case("fpinst", ARM::VMSR_FPINST)
3718                     .Case("fpinst2", ARM::VMSR_FPINST2)
3719                     .Default(0);
3720
3721   if (Opcode) {
3722     if (!Subtarget->hasVFP2())
3723       return nullptr;
3724     Ops = { N->getOperand(2), getAL(CurDAG, DL),
3725             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
3726     return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
3727   }
3728
3729   SmallVector<StringRef, 5> Fields;
3730   StringRef(SpecialReg).split(Fields, "_", 1, false);
3731   std::string Reg = Fields[0].str();
3732   StringRef Flags = Fields.size() == 2 ? Fields[1] : "";
3733
3734   // If the target was M Class then need to validate the special register value
3735   // and retrieve the mask for use in the instruction node.
3736   if (Subtarget->isMClass()) {
3737     // basepri_max gets split so need to correct Reg and Flags.
3738     if (SpecialReg == "basepri_max") {
3739       Reg = SpecialReg;
3740       Flags = "";
3741     }
3742     int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
3743     if (SYSmValue == -1)
3744       return nullptr;
3745
3746     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
3747                       N->getOperand(2), getAL(CurDAG, DL),
3748                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
3749     return CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops);
3750   }
3751
3752   // We then check to see if a valid mask can be constructed for one of the
3753   // register string values permitted for the A and R class cores. These values
3754   // are apsr, spsr and cpsr; these are also valid on older cores.
3755   int Mask = getARClassRegisterMask(Reg, Flags);
3756   if (Mask != -1) {
3757     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
3758             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3759             N->getOperand(0) };
3760     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
3761                                   DL, MVT::Other, Ops);
3762   }
3763
3764   return nullptr;
3765 }
3766
3767 SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
3768   std::vector<SDValue> AsmNodeOperands;
3769   unsigned Flag, Kind;
3770   bool Changed = false;
3771   unsigned NumOps = N->getNumOperands();
3772
3773   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
3774   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
3775   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
3776   // respectively. Since there is no constraint to explicitly specify a
3777   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
3778   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
3779   // them into a GPRPair.
3780
3781   SDLoc dl(N);
3782   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
3783                                    : SDValue(nullptr,0);
3784
3785   SmallVector<bool, 8> OpChanged;
3786   // Glue node will be appended late.
3787   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
3788     SDValue op = N->getOperand(i);
3789     AsmNodeOperands.push_back(op);
3790
3791     if (i < InlineAsm::Op_FirstOperand)
3792       continue;
3793
3794     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
3795       Flag = C->getZExtValue();
3796       Kind = InlineAsm::getKind(Flag);
3797     }
3798     else
3799       continue;
3800
3801     // Immediate operands to inline asm in the SelectionDAG are modeled with
3802     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
3803     // the second is a constant with the value of the immediate. If we get here
3804     // and we have a Kind_Imm, skip the next operand, and continue.
3805     if (Kind == InlineAsm::Kind_Imm) {
3806       SDValue op = N->getOperand(++i);
3807       AsmNodeOperands.push_back(op);
3808       continue;
3809     }
3810
3811     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
3812     if (NumRegs)
3813       OpChanged.push_back(false);
3814
3815     unsigned DefIdx = 0;
3816     bool IsTiedToChangedOp = false;
3817     // If it's a use that is tied with a previous def, it has no
3818     // reg class constraint.
3819     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
3820       IsTiedToChangedOp = OpChanged[DefIdx];
3821
3822     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
3823         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
3824       continue;
3825
3826     unsigned RC;
3827     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
3828     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
3829         || NumRegs != 2)
3830       continue;
3831
3832     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
3833     SDValue V0 = N->getOperand(i+1);
3834     SDValue V1 = N->getOperand(i+2);
3835     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
3836     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
3837     SDValue PairedReg;
3838     MachineRegisterInfo &MRI = MF->getRegInfo();
3839
3840     if (Kind == InlineAsm::Kind_RegDef ||
3841         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
3842       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
3843       // the original GPRs.
3844
3845       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
3846       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
3847       SDValue Chain = SDValue(N,0);
3848
3849       SDNode *GU = N->getGluedUser();
3850       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
3851                                                Chain.getValue(1));
3852
3853       // Extract values from a GPRPair reg and copy to the original GPR reg.
3854       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
3855                                                     RegCopy);
3856       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
3857                                                     RegCopy);
3858       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
3859                                         RegCopy.getValue(1));
3860       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
3861
3862       // Update the original glue user.
3863       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
3864       Ops.push_back(T1.getValue(1));
3865       CurDAG->UpdateNodeOperands(GU, Ops);
3866     }
3867     else {
3868       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
3869       // GPRPair and then pass the GPRPair to the inline asm.
3870       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
3871
3872       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
3873       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
3874                                           Chain.getValue(1));
3875       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
3876                                           T0.getValue(1));
3877       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
3878
3879       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
3880       // i32 VRs of inline asm with it.
3881       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
3882       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
3883       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
3884
3885       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
3886       Glue = Chain.getValue(1);
3887     }
3888
3889     Changed = true;
3890
3891     if(PairedReg.getNode()) {
3892       OpChanged[OpChanged.size() -1 ] = true;
3893       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
3894       if (IsTiedToChangedOp)
3895         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
3896       else
3897         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
3898       // Replace the current flag.
3899       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
3900           Flag, dl, MVT::i32);
3901       // Add the new register node and skip the original two GPRs.
3902       AsmNodeOperands.push_back(PairedReg);
3903       // Skip the next two GPRs.
3904       i += 2;
3905     }
3906   }
3907
3908   if (Glue.getNode())
3909     AsmNodeOperands.push_back(Glue);
3910   if (!Changed)
3911     return nullptr;
3912
3913   SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
3914       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
3915   New->setNodeId(-1);
3916   return New.getNode();
3917 }
3918
3919
3920 bool ARMDAGToDAGISel::
3921 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
3922                              std::vector<SDValue> &OutOps) {
3923   assert(ConstraintID == InlineAsm::Constraint_m &&
3924          "unexpected asm memory constraint");
3925   // Require the address to be in a register.  That is safe for all ARM
3926   // variants and it is hard to do anything much smarter without knowing
3927   // how the operand is used.
3928   OutOps.push_back(Op);
3929   return false;
3930 }
3931
3932 /// createARMISelDag - This pass converts a legalized DAG into a
3933 /// ARM-specific DAG, ready for instruction scheduling.
3934 ///
3935 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
3936                                      CodeGenOpt::Level OptLevel) {
3937   return new ARMDAGToDAGISel(TM, OptLevel);
3938 }