lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/CodeGen/CallingConvLower.h"
  21 #include "llvm/CodeGen/FastISel.h"
  22 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  23 #include "llvm/CodeGen/MachineConstantPool.h"
  24 #include "llvm/CodeGen/MachineFrameInfo.h"
  25 #include "llvm/CodeGen/MachineInstrBuilder.h"
  26 #include "llvm/CodeGen/MachineRegisterInfo.h"
  27 #include "llvm/IR/CallingConv.h"
  28 #include "llvm/IR/DataLayout.h"
  29 #include "llvm/IR/DerivedTypes.h"
  30 #include "llvm/IR/Function.h"
  31 #include "llvm/IR/GetElementPtrTypeIterator.h"
  32 #include "llvm/IR/GlobalAlias.h"
  33 #include "llvm/IR/GlobalVariable.h"
  34 #include "llvm/IR/Instructions.h"
  35 #include "llvm/IR/IntrinsicInst.h"
  36 #include "llvm/IR/Operator.h"
  37 #include "llvm/Support/CommandLine.h"
  38 using namespace llvm;
  39
  40 namespace {
  41
  42 class AArch64FastISel : public FastISel {
  43
  44   class Address {
  45   public:
  46     typedef enum {
  47       RegBase,
  48       FrameIndexBase
  49     } BaseKind;
  50
  51   private:
  52     BaseKind Kind;
  53     union {
  54       unsigned Reg;
  55       int FI;
  56     } Base;
  57     int64_t Offset;
  58     const GlobalValue *GV;
  59
  60   public:
  61     Address() : Kind(RegBase), Offset(0), GV(nullptr) { Base.Reg = 0; }
  62     void setKind(BaseKind K) { Kind = K; }
  63     BaseKind getKind() const { return Kind; }
  64     bool isRegBase() const { return Kind == RegBase; }
  65     bool isFIBase() const { return Kind == FrameIndexBase; }
  66     void setReg(unsigned Reg) {
  67       assert(isRegBase() && "Invalid base register access!");
  68       Base.Reg = Reg;
  69     }
  70     unsigned getReg() const {
  71       assert(isRegBase() && "Invalid base register access!");
  72       return Base.Reg;
  73     }
  74     void setFI(unsigned FI) {
  75       assert(isFIBase() && "Invalid base frame index  access!");
  76       Base.FI = FI;
  77     }
  78     unsigned getFI() const {
  79       assert(isFIBase() && "Invalid base frame index access!");
  80       return Base.FI;
  81     }
  82     void setOffset(int64_t O) { Offset = O; }
  83     int64_t getOffset() { return Offset; }
  84
  85     void setGlobalValue(const GlobalValue *G) { GV = G; }
  86     const GlobalValue *getGlobalValue() { return GV; }
  87
  88     bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); }
  89   };
  90
  91   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
  92   /// make the right decision when generating code for different targets.
  93   const AArch64Subtarget *Subtarget;
  94   LLVMContext *Context;
  95
  96   bool FastLowerCall(CallLoweringInfo &CLI) override;
  97   bool FastLowerIntrinsicCall(const IntrinsicInst *II) override;
  98
  99 private:
 100   // Selection routines.
 101   bool SelectLoad(const Instruction *I);
 102   bool SelectStore(const Instruction *I);
 103   bool SelectBranch(const Instruction *I);
 104   bool SelectIndirectBr(const Instruction *I);
 105   bool SelectCmp(const Instruction *I);
 106   bool SelectSelect(const Instruction *I);
 107   bool SelectFPExt(const Instruction *I);
 108   bool SelectFPTrunc(const Instruction *I);
 109   bool SelectFPToInt(const Instruction *I, bool Signed);
 110   bool SelectIntToFP(const Instruction *I, bool Signed);
 111   bool SelectRem(const Instruction *I, unsigned ISDOpcode);
 112   bool SelectRet(const Instruction *I);
 113   bool SelectTrunc(const Instruction *I);
 114   bool SelectIntExt(const Instruction *I);
 115   bool SelectMul(const Instruction *I);
 116   bool SelectShift(const Instruction *I, bool IsLeftShift, bool IsArithmetic);
 117   bool SelectBitCast(const Instruction *I);
 118
 119   // Utility helper routines.
 120   bool isTypeLegal(Type *Ty, MVT &VT);
 121   bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
 122   bool ComputeAddress(const Value *Obj, Address &Addr);
 123   bool ComputeCallAddress(const Value *V, Address &Addr);
 124   bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
 125                        bool UseUnscaled);
 126   void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 127                             unsigned Flags, bool UseUnscaled);
 128   bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
 129   bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 130                           unsigned Alignment);
 131   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 132                          const Value *Cond);
 133
 134   // Emit functions.
 135   bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
 136   bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 137                 bool UseUnscaled = false);
 138   bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 139                  bool UseUnscaled = false);
 140   unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 141   unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 142   unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 143                        unsigned Op1, bool Op1IsKill);
 144   unsigned Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 145                          unsigned Op1, bool Op1IsKill);
 146   unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 147                          unsigned Op1, bool Op1IsKill);
 148   unsigned Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 149   unsigned Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 150   unsigned Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 151
 152   unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
 153   unsigned AArch64MaterializeGV(const GlobalValue *GV);
 154
 155   // Call handling routines.
 156 private:
 157   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 158   bool ProcessCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 159                        unsigned &NumBytes);
 160   bool FinishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 161
 162 public:
 163   // Backend specific FastISel code.
 164   unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
 165   unsigned TargetMaterializeConstant(const Constant *C) override;
 166
 167   explicit AArch64FastISel(FunctionLoweringInfo &funcInfo,
 168                          const TargetLibraryInfo *libInfo)
 169       : FastISel(funcInfo, libInfo) {
 170     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 171     Context = &funcInfo.Fn->getContext();
 172   }
 173
 174   bool TargetSelectInstruction(const Instruction *I) override;
 175
 176 #include "AArch64GenFastISel.inc"
 177 };
 178
 179 } // end anonymous namespace
 180
 181 #include "AArch64GenCallingConv.inc"
 182
 183 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 184   if (CC == CallingConv::WebKit_JS)
 185     return CC_AArch64_WebKit_JS;
 186   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 187 }
 188
 189 unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
 190   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
 191          "Alloca should always return a pointer.");
 192
 193   // Don't handle dynamic allocas.
 194   if (!FuncInfo.StaticAllocaMap.count(AI))
 195     return 0;
 196
 197   DenseMap<const AllocaInst *, int>::iterator SI =
 198       FuncInfo.StaticAllocaMap.find(AI);
 199
 200   if (SI != FuncInfo.StaticAllocaMap.end()) {
 201     unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
 202     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 203             ResultReg)
 204         .addFrameIndex(SI->second)
 205         .addImm(0)
 206         .addImm(0);
 207     return ResultReg;
 208   }
 209
 210   return 0;
 211 }
 212
 213 unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
 214   if (VT != MVT::f32 && VT != MVT::f64)
 215     return 0;
 216
 217   const APFloat Val = CFP->getValueAPF();
 218   bool is64bit = (VT == MVT::f64);
 219
 220   // This checks to see if we can use FMOV instructions to materialize
 221   // a constant, otherwise we have to materialize via the constant pool.
 222   if (TLI.isFPImmLegal(Val, VT)) {
 223     int Imm;
 224     unsigned Opc;
 225     if (is64bit) {
 226       Imm = AArch64_AM::getFP64Imm(Val);
 227       Opc = AArch64::FMOVDi;
 228     } else {
 229       Imm = AArch64_AM::getFP32Imm(Val);
 230       Opc = AArch64::FMOVSi;
 231     }
 232     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 233     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 234         .addImm(Imm);
 235     return ResultReg;
 236   }
 237
 238   // Materialize via constant pool.  MachineConstantPool wants an explicit
 239   // alignment.
 240   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 241   if (Align == 0)
 242     Align = DL.getTypeAllocSize(CFP->getType());
 243
 244   unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 245   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 246   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 247           ADRPReg).addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGE);
 248
 249   unsigned Opc = is64bit ? AArch64::LDRDui : AArch64::LDRSui;
 250   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 251   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 252       .addReg(ADRPReg)
 253       .addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 254   return ResultReg;
 255 }
 256
 257 unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
 258   // We can't handle thread-local variables quickly yet.
 259   if (GV->isThreadLocal())
 260     return 0;
 261
 262   // MachO still uses GOT for large code-model accesses, but ELF requires
 263   // movz/movk sequences, which FastISel doesn't handle yet.
 264   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
 265     return 0;
 266
 267   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 268
 269   EVT DestEVT = TLI.getValueType(GV->getType(), true);
 270   if (!DestEVT.isSimple())
 271     return 0;
 272
 273   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 274   unsigned ResultReg;
 275
 276   if (OpFlags & AArch64II::MO_GOT) {
 277     // ADRP + LDRX
 278     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 279             ADRPReg)
 280         .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 281
 282     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 283     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 284             ResultReg)
 285         .addReg(ADRPReg)
 286         .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 287                           AArch64II::MO_NC);
 288   } else {
 289     // ADRP + ADDX
 290     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 291             ADRPReg).addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 292
 293     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 294     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 295             ResultReg)
 296         .addReg(ADRPReg)
 297         .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 298         .addImm(0);
 299   }
 300   return ResultReg;
 301 }
 302
 303 unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
 304   EVT CEVT = TLI.getValueType(C->getType(), true);
 305
 306   // Only handle simple types.
 307   if (!CEVT.isSimple())
 308     return 0;
 309   MVT VT = CEVT.getSimpleVT();
 310
 311   // FIXME: Handle ConstantInt.
 312   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 313     return AArch64MaterializeFP(CFP, VT);
 314   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 315     return AArch64MaterializeGV(GV);
 316
 317   return 0;
 318 }
 319
 320 // Computes the address to get to an object.
 321 bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
 322   const User *U = nullptr;
 323   unsigned Opcode = Instruction::UserOp1;
 324   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 325     // Don't walk into other basic blocks unless the object is an alloca from
 326     // another block, otherwise it may not have a virtual register assigned.
 327     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 328         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 329       Opcode = I->getOpcode();
 330       U = I;
 331     }
 332   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 333     Opcode = C->getOpcode();
 334     U = C;
 335   }
 336
 337   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 338     if (Ty->getAddressSpace() > 255)
 339       // Fast instruction selection doesn't support the special
 340       // address spaces.
 341       return false;
 342
 343   switch (Opcode) {
 344   default:
 345     break;
 346   case Instruction::BitCast: {
 347     // Look through bitcasts.
 348     return ComputeAddress(U->getOperand(0), Addr);
 349   }
 350   case Instruction::IntToPtr: {
 351     // Look past no-op inttoptrs.
 352     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 353       return ComputeAddress(U->getOperand(0), Addr);
 354     break;
 355   }
 356   case Instruction::PtrToInt: {
 357     // Look past no-op ptrtoints.
 358     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 359       return ComputeAddress(U->getOperand(0), Addr);
 360     break;
 361   }
 362   case Instruction::GetElementPtr: {
 363     Address SavedAddr = Addr;
 364     uint64_t TmpOffset = Addr.getOffset();
 365
 366     // Iterate through the GEP folding the constants into offsets where
 367     // we can.
 368     gep_type_iterator GTI = gep_type_begin(U);
 369     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
 370          ++i, ++GTI) {
 371       const Value *Op = *i;
 372       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
 373         const StructLayout *SL = DL.getStructLayout(STy);
 374         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 375         TmpOffset += SL->getElementOffset(Idx);
 376       } else {
 377         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 378         for (;;) {
 379           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 380             // Constant-offset addressing.
 381             TmpOffset += CI->getSExtValue() * S;
 382             break;
 383           }
 384           if (canFoldAddIntoGEP(U, Op)) {
 385             // A compatible add with a constant operand. Fold the constant.
 386             ConstantInt *CI =
 387                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 388             TmpOffset += CI->getSExtValue() * S;
 389             // Iterate on the other operand.
 390             Op = cast<AddOperator>(Op)->getOperand(0);
 391             continue;
 392           }
 393           // Unsupported
 394           goto unsupported_gep;
 395         }
 396       }
 397     }
 398
 399     // Try to grab the base operand now.
 400     Addr.setOffset(TmpOffset);
 401     if (ComputeAddress(U->getOperand(0), Addr))
 402       return true;
 403
 404     // We failed, restore everything and try the other options.
 405     Addr = SavedAddr;
 406
 407   unsupported_gep:
 408     break;
 409   }
 410   case Instruction::Alloca: {
 411     const AllocaInst *AI = cast<AllocaInst>(Obj);
 412     DenseMap<const AllocaInst *, int>::iterator SI =
 413         FuncInfo.StaticAllocaMap.find(AI);
 414     if (SI != FuncInfo.StaticAllocaMap.end()) {
 415       Addr.setKind(Address::FrameIndexBase);
 416       Addr.setFI(SI->second);
 417       return true;
 418     }
 419     break;
 420   }
 421   }
 422
 423   // Try to get this in a register if nothing else has worked.
 424   if (!Addr.isValid())
 425     Addr.setReg(getRegForValue(Obj));
 426   return Addr.isValid();
 427 }
 428
 429 bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) {
 430   const User *U = nullptr;
 431   unsigned Opcode = Instruction::UserOp1;
 432   bool InMBB = true;
 433
 434   if (const auto *I = dyn_cast<Instruction>(V)) {
 435     Opcode = I->getOpcode();
 436     U = I;
 437     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 438   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 439     Opcode = C->getOpcode();
 440     U = C;
 441   }
 442
 443   switch (Opcode) {
 444   default: break;
 445   case Instruction::BitCast:
 446     // Look past bitcasts if its operand is in the same BB.
 447     if (InMBB)
 448       return ComputeCallAddress(U->getOperand(0), Addr);
 449     break;
 450   case Instruction::IntToPtr:
 451     // Look past no-op inttoptrs if its operand is in the same BB.
 452     if (InMBB &&
 453         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 454       return ComputeCallAddress(U->getOperand(0), Addr);
 455     break;
 456   case Instruction::PtrToInt:
 457     // Look past no-op ptrtoints if its operand is in the same BB.
 458     if (InMBB &&
 459         TLI.getValueType(U->getType()) == TLI.getPointerTy())
 460       return ComputeCallAddress(U->getOperand(0), Addr);
 461     break;
 462   }
 463
 464   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 465     Addr.setGlobalValue(GV);
 466     return true;
 467   }
 468
 469   // If all else fails, try to materialize the value in a register.
 470   if (!Addr.getGlobalValue()) {
 471     Addr.setReg(getRegForValue(V));
 472     return Addr.getReg() != 0;
 473   }
 474
 475   return false;
 476 }
 477
 478
 479 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 480   EVT evt = TLI.getValueType(Ty, true);
 481
 482   // Only handle simple types.
 483   if (evt == MVT::Other || !evt.isSimple())
 484     return false;
 485   VT = evt.getSimpleVT();
 486
 487   // This is a legal type, but it's not something we handle in fast-isel.
 488   if (VT == MVT::f128)
 489     return false;
 490
 491   // Handle all other legal types, i.e. a register that will directly hold this
 492   // value.
 493   return TLI.isTypeLegal(VT);
 494 }
 495
 496 bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
 497   if (isTypeLegal(Ty, VT))
 498     return true;
 499
 500   // If this is a type than can be sign or zero-extended to a basic operation
 501   // go ahead and accept it now. For stores, this reflects truncation.
 502   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 503     return true;
 504
 505   return false;
 506 }
 507
 508 bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
 509                                       int64_t ScaleFactor, bool UseUnscaled) {
 510   bool needsLowering = false;
 511   int64_t Offset = Addr.getOffset();
 512   switch (VT.SimpleTy) {
 513   default:
 514     return false;
 515   case MVT::i1:
 516   case MVT::i8:
 517   case MVT::i16:
 518   case MVT::i32:
 519   case MVT::i64:
 520   case MVT::f32:
 521   case MVT::f64:
 522     if (!UseUnscaled)
 523       // Using scaled, 12-bit, unsigned immediate offsets.
 524       needsLowering = ((Offset & 0xfff) != Offset);
 525     else
 526       // Using unscaled, 9-bit, signed immediate offsets.
 527       needsLowering = (Offset > 256 || Offset < -256);
 528     break;
 529   }
 530
 531   //If this is a stack pointer and the offset needs to be simplified then put
 532   // the alloca address into a register, set the base type back to register and
 533   // continue. This should almost never happen.
 534   if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
 535     unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
 536     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 537             ResultReg)
 538         .addFrameIndex(Addr.getFI())
 539         .addImm(0)
 540         .addImm(0);
 541     Addr.setKind(Address::RegBase);
 542     Addr.setReg(ResultReg);
 543   }
 544
 545   // Since the offset is too large for the load/store instruction get the
 546   // reg+offset into a register.
 547   if (needsLowering) {
 548     uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor;
 549     unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false,
 550                                       UnscaledOffset, MVT::i64);
 551     if (ResultReg == 0)
 552       return false;
 553     Addr.setReg(ResultReg);
 554     Addr.setOffset(0);
 555   }
 556   return true;
 557 }
 558
 559 void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
 560                                            const MachineInstrBuilder &MIB,
 561                                            unsigned Flags, bool UseUnscaled) {
 562   int64_t Offset = Addr.getOffset();
 563   // Frame base works a bit differently. Handle it separately.
 564   if (Addr.getKind() == Address::FrameIndexBase) {
 565     int FI = Addr.getFI();
 566     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
 567     // and alignment should be based on the VT.
 568     MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
 569         MachinePointerInfo::getFixedStack(FI, Offset), Flags,
 570         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
 571     // Now add the rest of the operands.
 572     MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
 573   } else {
 574     // Now add the rest of the operands.
 575     MIB.addReg(Addr.getReg());
 576     MIB.addImm(Offset);
 577   }
 578 }
 579
 580 bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 581                                bool UseUnscaled) {
 582   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
 583   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
 584   if (!UseUnscaled && Addr.getOffset() < 0)
 585     UseUnscaled = true;
 586
 587   unsigned Opc;
 588   const TargetRegisterClass *RC;
 589   bool VTIsi1 = false;
 590   int64_t ScaleFactor = 0;
 591   switch (VT.SimpleTy) {
 592   default:
 593     return false;
 594   case MVT::i1:
 595     VTIsi1 = true;
 596   // Intentional fall-through.
 597   case MVT::i8:
 598     Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui;
 599     RC = &AArch64::GPR32RegClass;
 600     ScaleFactor = 1;
 601     break;
 602   case MVT::i16:
 603     Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui;
 604     RC = &AArch64::GPR32RegClass;
 605     ScaleFactor = 2;
 606     break;
 607   case MVT::i32:
 608     Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui;
 609     RC = &AArch64::GPR32RegClass;
 610     ScaleFactor = 4;
 611     break;
 612   case MVT::i64:
 613     Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui;
 614     RC = &AArch64::GPR64RegClass;
 615     ScaleFactor = 8;
 616     break;
 617   case MVT::f32:
 618     Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui;
 619     RC = TLI.getRegClassFor(VT);
 620     ScaleFactor = 4;
 621     break;
 622   case MVT::f64:
 623     Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui;
 624     RC = TLI.getRegClassFor(VT);
 625     ScaleFactor = 8;
 626     break;
 627   }
 628   // Scale the offset.
 629   if (!UseUnscaled) {
 630     int64_t Offset = Addr.getOffset();
 631     if (Offset & (ScaleFactor - 1))
 632       // Retry using an unscaled, 9-bit, signed immediate offset.
 633       return EmitLoad(VT, ResultReg, Addr, /*UseUnscaled*/ true);
 634
 635     Addr.setOffset(Offset / ScaleFactor);
 636   }
 637
 638   // Simplify this down to something we can handle.
 639   if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
 640     return false;
 641
 642   // Create the base instruction, then add the operands.
 643   ResultReg = createResultReg(RC);
 644   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 645                                     TII.get(Opc), ResultReg);
 646   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled);
 647
 648   // Loading an i1 requires special handling.
 649   if (VTIsi1) {
 650     MRI.constrainRegClass(ResultReg, &AArch64::GPR32RegClass);
 651     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 652     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
 653             ANDReg)
 654         .addReg(ResultReg)
 655         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 656     ResultReg = ANDReg;
 657   }
 658   return true;
 659 }
 660
 661 bool AArch64FastISel::SelectLoad(const Instruction *I) {
 662   MVT VT;
 663   // Verify we have a legal type before going any further.  Currently, we handle
 664   // simple types that will directly fit in a register (i32/f32/i64/f64) or
 665   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
 666   if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic())
 667     return false;
 668
 669   // See if we can handle this address.
 670   Address Addr;
 671   if (!ComputeAddress(I->getOperand(0), Addr))
 672     return false;
 673
 674   unsigned ResultReg;
 675   if (!EmitLoad(VT, ResultReg, Addr))
 676     return false;
 677
 678   UpdateValueMap(I, ResultReg);
 679   return true;
 680 }
 681
 682 bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 683                                 bool UseUnscaled) {
 684   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
 685   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
 686   if (!UseUnscaled && Addr.getOffset() < 0)
 687     UseUnscaled = true;
 688
 689   unsigned StrOpc;
 690   bool VTIsi1 = false;
 691   int64_t ScaleFactor = 0;
 692   // Using scaled, 12-bit, unsigned immediate offsets.
 693   switch (VT.SimpleTy) {
 694   default:
 695     return false;
 696   case MVT::i1:
 697     VTIsi1 = true;
 698   case MVT::i8:
 699     StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui;
 700     ScaleFactor = 1;
 701     break;
 702   case MVT::i16:
 703     StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui;
 704     ScaleFactor = 2;
 705     break;
 706   case MVT::i32:
 707     StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui;
 708     ScaleFactor = 4;
 709     break;
 710   case MVT::i64:
 711     StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui;
 712     ScaleFactor = 8;
 713     break;
 714   case MVT::f32:
 715     StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui;
 716     ScaleFactor = 4;
 717     break;
 718   case MVT::f64:
 719     StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui;
 720     ScaleFactor = 8;
 721     break;
 722   }
 723   // Scale the offset.
 724   if (!UseUnscaled) {
 725     int64_t Offset = Addr.getOffset();
 726     if (Offset & (ScaleFactor - 1))
 727       // Retry using an unscaled, 9-bit, signed immediate offset.
 728       return EmitStore(VT, SrcReg, Addr, /*UseUnscaled*/ true);
 729
 730     Addr.setOffset(Offset / ScaleFactor);
 731   }
 732
 733   // Simplify this down to something we can handle.
 734   if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
 735     return false;
 736
 737   // Storing an i1 requires special handling.
 738   if (VTIsi1) {
 739     MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
 740     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 741     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
 742             ANDReg)
 743         .addReg(SrcReg)
 744         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 745     SrcReg = ANDReg;
 746   }
 747   // Create the base instruction, then add the operands.
 748   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 749                                     TII.get(StrOpc)).addReg(SrcReg);
 750   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled);
 751   return true;
 752 }
 753
 754 bool AArch64FastISel::SelectStore(const Instruction *I) {
 755   MVT VT;
 756   Value *Op0 = I->getOperand(0);
 757   // Verify we have a legal type before going any further.  Currently, we handle
 758   // simple types that will directly fit in a register (i32/f32/i64/f64) or
 759   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
 760   if (!isLoadStoreTypeLegal(Op0->getType(), VT) ||
 761       cast<StoreInst>(I)->isAtomic())
 762     return false;
 763
 764   // Get the value to be stored into a register.
 765   unsigned SrcReg = getRegForValue(Op0);
 766   if (SrcReg == 0)
 767     return false;
 768
 769   // See if we can handle this address.
 770   Address Addr;
 771   if (!ComputeAddress(I->getOperand(1), Addr))
 772     return false;
 773
 774   if (!EmitStore(VT, SrcReg, Addr))
 775     return false;
 776   return true;
 777 }
 778
 779 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
 780   switch (Pred) {
 781   case CmpInst::FCMP_ONE:
 782   case CmpInst::FCMP_UEQ:
 783   default:
 784     // AL is our "false" for now. The other two need more compares.
 785     return AArch64CC::AL;
 786   case CmpInst::ICMP_EQ:
 787   case CmpInst::FCMP_OEQ:
 788     return AArch64CC::EQ;
 789   case CmpInst::ICMP_SGT:
 790   case CmpInst::FCMP_OGT:
 791     return AArch64CC::GT;
 792   case CmpInst::ICMP_SGE:
 793   case CmpInst::FCMP_OGE:
 794     return AArch64CC::GE;
 795   case CmpInst::ICMP_UGT:
 796   case CmpInst::FCMP_UGT:
 797     return AArch64CC::HI;
 798   case CmpInst::FCMP_OLT:
 799     return AArch64CC::MI;
 800   case CmpInst::ICMP_ULE:
 801   case CmpInst::FCMP_OLE:
 802     return AArch64CC::LS;
 803   case CmpInst::FCMP_ORD:
 804     return AArch64CC::VC;
 805   case CmpInst::FCMP_UNO:
 806     return AArch64CC::VS;
 807   case CmpInst::FCMP_UGE:
 808     return AArch64CC::PL;
 809   case CmpInst::ICMP_SLT:
 810   case CmpInst::FCMP_ULT:
 811     return AArch64CC::LT;
 812   case CmpInst::ICMP_SLE:
 813   case CmpInst::FCMP_ULE:
 814     return AArch64CC::LE;
 815   case CmpInst::FCMP_UNE:
 816   case CmpInst::ICMP_NE:
 817     return AArch64CC::NE;
 818   case CmpInst::ICMP_UGE:
 819     return AArch64CC::HS;
 820   case CmpInst::ICMP_ULT:
 821     return AArch64CC::LO;
 822   }
 823 }
 824
 825 bool AArch64FastISel::SelectBranch(const Instruction *I) {
 826   const BranchInst *BI = cast<BranchInst>(I);
 827   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
 828   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
 829
 830   AArch64CC::CondCode CC = AArch64CC::NE;
 831   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
 832     if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
 833       // We may not handle every CC for now.
 834       CC = getCompareCC(CI->getPredicate());
 835       if (CC == AArch64CC::AL)
 836         return false;
 837
 838       // Emit the cmp.
 839       if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
 840         return false;
 841
 842       // Emit the branch.
 843       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 844           .addImm(CC)
 845           .addMBB(TBB);
 846       FuncInfo.MBB->addSuccessor(TBB);
 847
 848       FastEmitBranch(FBB, DbgLoc);
 849       return true;
 850     }
 851   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
 852     MVT SrcVT;
 853     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
 854         (isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) {
 855       unsigned CondReg = getRegForValue(TI->getOperand(0));
 856       if (CondReg == 0)
 857         return false;
 858
 859       // Issue an extract_subreg to get the lower 32-bits.
 860       if (SrcVT == MVT::i64)
 861         CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true,
 862                                              AArch64::sub_32);
 863
 864       MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
 865       unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 866       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 867               TII.get(AArch64::ANDWri), ANDReg)
 868           .addReg(CondReg)
 869           .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 870       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 871               TII.get(AArch64::SUBSWri))
 872           .addReg(ANDReg)
 873           .addReg(ANDReg)
 874           .addImm(0)
 875           .addImm(0);
 876
 877       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
 878         std::swap(TBB, FBB);
 879         CC = AArch64CC::EQ;
 880       }
 881       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 882           .addImm(CC)
 883           .addMBB(TBB);
 884       FuncInfo.MBB->addSuccessor(TBB);
 885       FastEmitBranch(FBB, DbgLoc);
 886       return true;
 887     }
 888   } else if (const ConstantInt *CI =
 889                  dyn_cast<ConstantInt>(BI->getCondition())) {
 890     uint64_t Imm = CI->getZExtValue();
 891     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
 892     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
 893         .addMBB(Target);
 894     FuncInfo.MBB->addSuccessor(Target);
 895     return true;
 896   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
 897     // Fake request the condition, otherwise the intrinsic might be completely
 898     // optimized away.
 899     unsigned CondReg = getRegForValue(BI->getCondition());
 900     if (!CondReg)
 901       return false;
 902
 903     // Emit the branch.
 904     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 905       .addImm(CC)
 906       .addMBB(TBB);
 907     FuncInfo.MBB->addSuccessor(TBB);
 908
 909     FastEmitBranch(FBB, DbgLoc);
 910     return true;
 911   }
 912
 913   unsigned CondReg = getRegForValue(BI->getCondition());
 914   if (CondReg == 0)
 915     return false;
 916
 917   // We've been divorced from our compare!  Our block was split, and
 918   // now our compare lives in a predecessor block.  We musn't
 919   // re-compare here, as the children of the compare aren't guaranteed
 920   // live across the block boundary (we *could* check for this).
 921   // Regardless, the compare has been done in the predecessor block,
 922   // and it left a value for us in a virtual register.  Ergo, we test
 923   // the one-bit value left in the virtual register.
 924   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri),
 925           AArch64::WZR)
 926       .addReg(CondReg)
 927       .addImm(0)
 928       .addImm(0);
 929
 930   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
 931     std::swap(TBB, FBB);
 932     CC = AArch64CC::EQ;
 933   }
 934
 935   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 936       .addImm(CC)
 937       .addMBB(TBB);
 938   FuncInfo.MBB->addSuccessor(TBB);
 939   FastEmitBranch(FBB, DbgLoc);
 940   return true;
 941 }
 942
 943 bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
 944   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
 945   unsigned AddrReg = getRegForValue(BI->getOperand(0));
 946   if (AddrReg == 0)
 947     return false;
 948
 949   // Emit the indirect branch.
 950   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BR))
 951       .addReg(AddrReg);
 952
 953   // Make sure the CFG is up-to-date.
 954   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
 955     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
 956
 957   return true;
 958 }
 959
 960 bool AArch64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
 961   Type *Ty = Src1Value->getType();
 962   EVT SrcEVT = TLI.getValueType(Ty, true);
 963   if (!SrcEVT.isSimple())
 964     return false;
 965   MVT SrcVT = SrcEVT.getSimpleVT();
 966
 967   // Check to see if the 2nd operand is a constant that we can encode directly
 968   // in the compare.
 969   uint64_t Imm;
 970   bool UseImm = false;
 971   bool isNegativeImm = false;
 972   if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
 973     if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
 974         SrcVT == MVT::i8 || SrcVT == MVT::i1) {
 975       const APInt &CIVal = ConstInt->getValue();
 976
 977       Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue();
 978       if (CIVal.isNegative()) {
 979         isNegativeImm = true;
 980         Imm = -Imm;
 981       }
 982       // FIXME: We can handle more immediates using shifts.
 983       UseImm = ((Imm & 0xfff) == Imm);
 984     }
 985   } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
 986     if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
 987       if (ConstFP->isZero() && !ConstFP->isNegative())
 988         UseImm = true;
 989   }
 990
 991   unsigned ZReg;
 992   unsigned CmpOpc;
 993   bool isICmp = true;
 994   bool needsExt = false;
 995   switch (SrcVT.SimpleTy) {
 996   default:
 997     return false;
 998   case MVT::i1:
 999   case MVT::i8:
1000   case MVT::i16:
1001     needsExt = true;
1002   // Intentional fall-through.
1003   case MVT::i32:
1004     ZReg = AArch64::WZR;
1005     if (UseImm)
1006       CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri;
1007     else
1008       CmpOpc = AArch64::SUBSWrr;
1009     break;
1010   case MVT::i64:
1011     ZReg = AArch64::XZR;
1012     if (UseImm)
1013       CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri;
1014     else
1015       CmpOpc = AArch64::SUBSXrr;
1016     break;
1017   case MVT::f32:
1018     isICmp = false;
1019     CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr;
1020     break;
1021   case MVT::f64:
1022     isICmp = false;
1023     CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr;
1024     break;
1025   }
1026
1027   unsigned SrcReg1 = getRegForValue(Src1Value);
1028   if (SrcReg1 == 0)
1029     return false;
1030
1031   unsigned SrcReg2;
1032   if (!UseImm) {
1033     SrcReg2 = getRegForValue(Src2Value);
1034     if (SrcReg2 == 0)
1035       return false;
1036   }
1037
1038   // We have i1, i8, or i16, we need to either zero extend or sign extend.
1039   if (needsExt) {
1040     SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
1041     if (SrcReg1 == 0)
1042       return false;
1043     if (!UseImm) {
1044       SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
1045       if (SrcReg2 == 0)
1046         return false;
1047     }
1048   }
1049
1050   if (isICmp) {
1051     if (UseImm)
1052       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1053           .addReg(ZReg)
1054           .addReg(SrcReg1)
1055           .addImm(Imm)
1056           .addImm(0);
1057     else
1058       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1059           .addReg(ZReg)
1060           .addReg(SrcReg1)
1061           .addReg(SrcReg2);
1062   } else {
1063     if (UseImm)
1064       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1065           .addReg(SrcReg1);
1066     else
1067       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1068           .addReg(SrcReg1)
1069           .addReg(SrcReg2);
1070   }
1071   return true;
1072 }
1073
1074 bool AArch64FastISel::SelectCmp(const Instruction *I) {
1075   const CmpInst *CI = cast<CmpInst>(I);
1076
1077   // We may not handle every CC for now.
1078   AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
1079   if (CC == AArch64CC::AL)
1080     return false;
1081
1082   // Emit the cmp.
1083   if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1084     return false;
1085
1086   // Now set a register based on the comparison.
1087   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
1088   unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
1089   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1090           ResultReg)
1091       .addReg(AArch64::WZR)
1092       .addReg(AArch64::WZR)
1093       .addImm(invertedCC);
1094
1095   UpdateValueMap(I, ResultReg);
1096   return true;
1097 }
1098
1099 bool AArch64FastISel::SelectSelect(const Instruction *I) {
1100   const SelectInst *SI = cast<SelectInst>(I);
1101
1102   EVT DestEVT = TLI.getValueType(SI->getType(), true);
1103   if (!DestEVT.isSimple())
1104     return false;
1105
1106   MVT DestVT = DestEVT.getSimpleVT();
1107   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
1108       DestVT != MVT::f64)
1109     return false;
1110
1111   unsigned SelectOpc;
1112   switch (DestVT.SimpleTy) {
1113   default: return false;
1114   case MVT::i32: SelectOpc = AArch64::CSELWr;    break;
1115   case MVT::i64: SelectOpc = AArch64::CSELXr;    break;
1116   case MVT::f32: SelectOpc = AArch64::FCSELSrrr; break;
1117   case MVT::f64: SelectOpc = AArch64::FCSELDrrr; break;
1118   }
1119
1120   const Value *Cond = SI->getCondition();
1121   bool NeedTest = true;
1122   AArch64CC::CondCode CC = AArch64CC::NE;
1123   if (foldXALUIntrinsic(CC, I, Cond))
1124     NeedTest = false;
1125
1126   unsigned CondReg = getRegForValue(Cond);
1127   if (!CondReg)
1128     return false;
1129   bool CondIsKill = hasTrivialKill(Cond);
1130
1131   if (NeedTest) {
1132     MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
1133     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
1134     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
1135             ANDReg)
1136       .addReg(CondReg, getKillRegState(CondIsKill))
1137       .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1138
1139     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri))
1140       .addReg(ANDReg)
1141       .addReg(ANDReg)
1142       .addImm(0)
1143       .addImm(0);
1144   }
1145
1146   unsigned TrueReg = getRegForValue(SI->getTrueValue());
1147   bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
1148
1149   unsigned FalseReg = getRegForValue(SI->getFalseValue());
1150   bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
1151
1152   if (!TrueReg || !FalseReg)
1153     return false;
1154
1155   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
1156   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc),
1157           ResultReg)
1158     .addReg(TrueReg, getKillRegState(TrueIsKill))
1159     .addReg(FalseReg, getKillRegState(FalseIsKill))
1160     .addImm(CC);
1161
1162   UpdateValueMap(I, ResultReg);
1163   return true;
1164 }
1165
1166 bool AArch64FastISel::SelectFPExt(const Instruction *I) {
1167   Value *V = I->getOperand(0);
1168   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
1169     return false;
1170
1171   unsigned Op = getRegForValue(V);
1172   if (Op == 0)
1173     return false;
1174
1175   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
1176   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
1177           ResultReg).addReg(Op);
1178   UpdateValueMap(I, ResultReg);
1179   return true;
1180 }
1181
1182 bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
1183   Value *V = I->getOperand(0);
1184   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
1185     return false;
1186
1187   unsigned Op = getRegForValue(V);
1188   if (Op == 0)
1189     return false;
1190
1191   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
1192   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
1193           ResultReg).addReg(Op);
1194   UpdateValueMap(I, ResultReg);
1195   return true;
1196 }
1197
1198 // FPToUI and FPToSI
1199 bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
1200   MVT DestVT;
1201   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1202     return false;
1203
1204   unsigned SrcReg = getRegForValue(I->getOperand(0));
1205   if (SrcReg == 0)
1206     return false;
1207
1208   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1209   if (SrcVT == MVT::f128)
1210     return false;
1211
1212   unsigned Opc;
1213   if (SrcVT == MVT::f64) {
1214     if (Signed)
1215       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
1216     else
1217       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
1218   } else {
1219     if (Signed)
1220       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
1221     else
1222       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
1223   }
1224   unsigned ResultReg = createResultReg(
1225       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
1226   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1227       .addReg(SrcReg);
1228   UpdateValueMap(I, ResultReg);
1229   return true;
1230 }
1231
1232 bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
1233   MVT DestVT;
1234   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1235     return false;
1236   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
1237           "Unexpected value type.");
1238
1239   unsigned SrcReg = getRegForValue(I->getOperand(0));
1240   if (SrcReg == 0)
1241     return false;
1242
1243   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1244
1245   // Handle sign-extension.
1246   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
1247     SrcReg =
1248         EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
1249     if (SrcReg == 0)
1250       return false;
1251   }
1252
1253   MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &AArch64::GPR64RegClass
1254                                                   : &AArch64::GPR32RegClass);
1255
1256   unsigned Opc;
1257   if (SrcVT == MVT::i64) {
1258     if (Signed)
1259       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
1260     else
1261       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
1262   } else {
1263     if (Signed)
1264       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
1265     else
1266       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
1267   }
1268
1269   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
1270   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1271       .addReg(SrcReg);
1272   UpdateValueMap(I, ResultReg);
1273   return true;
1274 }
1275
1276 bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
1277                                       SmallVectorImpl<MVT> &OutVTs,
1278                                       unsigned &NumBytes) {
1279   CallingConv::ID CC = CLI.CallConv;
1280   SmallVector<CCValAssign, 16> ArgLocs;
1281   CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
1282   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
1283
1284   // Get a count of how many bytes are to be pushed on the stack.
1285   NumBytes = CCInfo.getNextStackOffset();
1286
1287   // Issue CALLSEQ_START
1288   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1289   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
1290     .addImm(NumBytes);
1291
1292   // Process the args.
1293   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1294     CCValAssign &VA = ArgLocs[i];
1295     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
1296     MVT ArgVT = OutVTs[VA.getValNo()];
1297
1298     unsigned ArgReg = getRegForValue(ArgVal);
1299     if (!ArgReg)
1300       return false;
1301
1302     // Handle arg promotion: SExt, ZExt, AExt.
1303     switch (VA.getLocInfo()) {
1304     case CCValAssign::Full:
1305       break;
1306     case CCValAssign::SExt: {
1307       MVT DestVT = VA.getLocVT();
1308       MVT SrcVT = ArgVT;
1309       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
1310       if (!ArgReg)
1311         return false;
1312       break;
1313     }
1314     case CCValAssign::AExt:
1315     // Intentional fall-through.
1316     case CCValAssign::ZExt: {
1317       MVT DestVT = VA.getLocVT();
1318       MVT SrcVT = ArgVT;
1319       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
1320       if (!ArgReg)
1321         return false;
1322       break;
1323     }
1324     default:
1325       llvm_unreachable("Unknown arg promotion!");
1326     }
1327
1328     // Now copy/store arg to correct locations.
1329     if (VA.isRegLoc() && !VA.needsCustom()) {
1330       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1331               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
1332       CLI.OutRegs.push_back(VA.getLocReg());
1333     } else if (VA.needsCustom()) {
1334       // FIXME: Handle custom args.
1335       return false;
1336     } else {
1337       assert(VA.isMemLoc() && "Assuming store on stack.");
1338
1339       // Don't emit stores for undef values.
1340       if (isa<UndefValue>(ArgVal))
1341         continue;
1342
1343       // Need to store on the stack.
1344       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
1345
1346       unsigned BEAlign = 0;
1347       if (ArgSize < 8 && !Subtarget->isLittleEndian())
1348         BEAlign = 8 - ArgSize;
1349
1350       Address Addr;
1351       Addr.setKind(Address::RegBase);
1352       Addr.setReg(AArch64::SP);
1353       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
1354
1355       if (!EmitStore(ArgVT, ArgReg, Addr))
1356         return false;
1357     }
1358   }
1359   return true;
1360 }
1361
1362 bool AArch64FastISel::FinishCall(CallLoweringInfo &CLI, MVT RetVT,
1363                                  unsigned NumBytes) {
1364   CallingConv::ID CC = CLI.CallConv;
1365
1366   // Issue CALLSEQ_END
1367   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
1368   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
1369     .addImm(NumBytes).addImm(0);
1370
1371   // Now the return value.
1372   if (RetVT != MVT::isVoid) {
1373     SmallVector<CCValAssign, 16> RVLocs;
1374     CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
1375     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
1376
1377     // Only handle a single return value.
1378     if (RVLocs.size() != 1)
1379       return false;
1380
1381     // Copy all of the result registers out of their specified physreg.
1382     MVT CopyVT = RVLocs[0].getValVT();
1383     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
1384     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1385             TII.get(TargetOpcode::COPY), ResultReg)
1386       .addReg(RVLocs[0].getLocReg());
1387     CLI.InRegs.push_back(RVLocs[0].getLocReg());
1388
1389     CLI.ResultReg = ResultReg;
1390     CLI.NumResultRegs = 1;
1391   }
1392
1393   return true;
1394 }
1395
1396 bool AArch64FastISel::FastLowerCall(CallLoweringInfo &CLI) {
1397   CallingConv::ID CC  = CLI.CallConv;
1398   bool IsVarArg       = CLI.IsVarArg;
1399   const Value *Callee = CLI.Callee;
1400   const char *SymName = CLI.SymName;
1401
1402   CodeModel::Model CM = TM.getCodeModel();
1403   // Only support the small and large code model.
1404   if (CM != CodeModel::Small && CM != CodeModel::Large)
1405     return false;
1406
1407   // FIXME: Add large code model support for ELF.
1408   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
1409     return false;
1410
1411   // Let SDISel handle vararg functions.
1412   if (IsVarArg)
1413     return false;
1414
1415   // FIXME: Only handle *simple* calls for now.
1416   MVT RetVT;
1417   if (CLI.RetTy->isVoidTy())
1418     RetVT = MVT::isVoid;
1419   else if (!isTypeLegal(CLI.RetTy, RetVT))
1420     return false;
1421
1422   for (auto Flag : CLI.OutFlags)
1423     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
1424       return false;
1425
1426   // Set up the argument vectors.
1427   SmallVector<MVT, 16> OutVTs;
1428   OutVTs.reserve(CLI.OutVals.size());
1429
1430   for (auto *Val : CLI.OutVals) {
1431     MVT VT;
1432     if (!isTypeLegal(Val->getType(), VT) &&
1433         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
1434       return false;
1435
1436     // We don't handle vector parameters yet.
1437     if (VT.isVector() || VT.getSizeInBits() > 64)
1438       return false;
1439
1440     OutVTs.push_back(VT);
1441   }
1442
1443   Address Addr;
1444   if (!ComputeCallAddress(Callee, Addr))
1445     return false;
1446
1447   // Handle the arguments now that we've gotten them.
1448   unsigned NumBytes;
1449   if (!ProcessCallArgs(CLI, OutVTs, NumBytes))
1450     return false;
1451
1452   // Issue the call.
1453   MachineInstrBuilder MIB;
1454   if (CM == CodeModel::Small) {
1455     unsigned CallOpc = Addr.getReg() ? AArch64::BLR : AArch64::BL;
1456     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
1457     if (SymName)
1458       MIB.addExternalSymbol(SymName, 0);
1459     else if (Addr.getGlobalValue())
1460       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
1461     else if (Addr.getReg())
1462       MIB.addReg(Addr.getReg());
1463     else
1464       return false;
1465   } else {
1466     unsigned CallReg = 0;
1467     if (SymName) {
1468       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
1469       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
1470               ADRPReg)
1471         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
1472
1473       CallReg = createResultReg(&AArch64::GPR64RegClass);
1474       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
1475               CallReg)
1476         .addReg(ADRPReg)
1477         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
1478                            AArch64II::MO_NC);
1479     } else if (Addr.getGlobalValue()) {
1480       CallReg = AArch64MaterializeGV(Addr.getGlobalValue());
1481     } else if (Addr.getReg())
1482       CallReg = Addr.getReg();
1483
1484     if (!CallReg)
1485       return false;
1486
1487     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1488                   TII.get(AArch64::BLR)).addReg(CallReg);
1489   }
1490
1491   // Add implicit physical register uses to the call.
1492   for (auto Reg : CLI.OutRegs)
1493     MIB.addReg(Reg, RegState::Implicit);
1494
1495   // Add a register mask with the call-preserved registers.
1496   // Proper defs for return values will be added by setPhysRegsDeadExcept().
1497   MIB.addRegMask(TRI.getCallPreservedMask(CC));
1498
1499   CLI.Call = MIB;
1500
1501   // Finish off the call including any return values.
1502   return FinishCall(CLI, RetVT, NumBytes);
1503 }
1504
1505 bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
1506   if (Alignment)
1507     return Len / Alignment <= 4;
1508   else
1509     return Len < 32;
1510 }
1511
1512 bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
1513                                          uint64_t Len, unsigned Alignment) {
1514   // Make sure we don't bloat code by inlining very large memcpy's.
1515   if (!IsMemCpySmall(Len, Alignment))
1516     return false;
1517
1518   int64_t UnscaledOffset = 0;
1519   Address OrigDest = Dest;
1520   Address OrigSrc = Src;
1521
1522   while (Len) {
1523     MVT VT;
1524     if (!Alignment || Alignment >= 8) {
1525       if (Len >= 8)
1526         VT = MVT::i64;
1527       else if (Len >= 4)
1528         VT = MVT::i32;
1529       else if (Len >= 2)
1530         VT = MVT::i16;
1531       else {
1532         VT = MVT::i8;
1533       }
1534     } else {
1535       // Bound based on alignment.
1536       if (Len >= 4 && Alignment == 4)
1537         VT = MVT::i32;
1538       else if (Len >= 2 && Alignment == 2)
1539         VT = MVT::i16;
1540       else {
1541         VT = MVT::i8;
1542       }
1543     }
1544
1545     bool RV;
1546     unsigned ResultReg;
1547     RV = EmitLoad(VT, ResultReg, Src);
1548     if (!RV)
1549       return false;
1550
1551     RV = EmitStore(VT, ResultReg, Dest);
1552     if (!RV)
1553       return false;
1554
1555     int64_t Size = VT.getSizeInBits() / 8;
1556     Len -= Size;
1557     UnscaledOffset += Size;
1558
1559     // We need to recompute the unscaled offset for each iteration.
1560     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
1561     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
1562   }
1563
1564   return true;
1565 }
1566
1567 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
1568 /// into the user. The condition code will only be updated on success.
1569 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
1570                                         const Instruction *I,
1571                                         const Value *Cond) {
1572   if (!isa<ExtractValueInst>(Cond))
1573     return false;
1574
1575   const auto *EV = cast<ExtractValueInst>(Cond);
1576   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
1577     return false;
1578
1579   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
1580   MVT RetVT;
1581   const Function *Callee = II->getCalledFunction();
1582   Type *RetTy =
1583   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
1584   if (!isTypeLegal(RetTy, RetVT))
1585     return false;
1586
1587   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1588     return false;
1589
1590   AArch64CC::CondCode TmpCC;
1591   switch (II->getIntrinsicID()) {
1592     default: return false;
1593     case Intrinsic::sadd_with_overflow:
1594     case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break;
1595     case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break;
1596     case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break;
1597     case Intrinsic::smul_with_overflow:
1598     case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break;
1599   }
1600
1601   // Check if both instructions are in the same basic block.
1602   if (II->getParent() != I->getParent())
1603     return false;
1604
1605   // Make sure nothing is in the way
1606   BasicBlock::const_iterator Start = I;
1607   BasicBlock::const_iterator End = II;
1608   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
1609     // We only expect extractvalue instructions between the intrinsic and the
1610     // instruction to be selected.
1611     if (!isa<ExtractValueInst>(Itr))
1612       return false;
1613
1614     // Check that the extractvalue operand comes from the intrinsic.
1615     const auto *EVI = cast<ExtractValueInst>(Itr);
1616     if (EVI->getAggregateOperand() != II)
1617       return false;
1618   }
1619
1620   CC = TmpCC;
1621   return true;
1622 }
1623
1624 bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
1625   // FIXME: Handle more intrinsics.
1626   switch (II->getIntrinsicID()) {
1627   default: return false;
1628   case Intrinsic::frameaddress: {
1629     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
1630     MFI->setFrameAddressIsTaken(true);
1631
1632     const AArch64RegisterInfo *RegInfo =
1633       static_cast<const AArch64RegisterInfo *>(TM.getRegisterInfo());
1634     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
1635     unsigned SrcReg = FramePtr;
1636
1637     // Recursively load frame address
1638     // ldr x0, [fp]
1639     // ldr x0, [x0]
1640     // ldr x0, [x0]
1641     // ...
1642     unsigned DestReg;
1643     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
1644     while (Depth--) {
1645       DestReg = createResultReg(&AArch64::GPR64RegClass);
1646       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1647               TII.get(AArch64::LDRXui), DestReg)
1648         .addReg(SrcReg).addImm(0);
1649       SrcReg = DestReg;
1650     }
1651
1652     UpdateValueMap(II, SrcReg);
1653     return true;
1654   }
1655   case Intrinsic::memcpy:
1656   case Intrinsic::memmove: {
1657     const auto *MTI = cast<MemTransferInst>(II);
1658     // Don't handle volatile.
1659     if (MTI->isVolatile())
1660       return false;
1661
1662     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
1663     // we would emit dead code because we don't currently handle memmoves.
1664     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
1665     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
1666       // Small memcpy's are common enough that we want to do them without a call
1667       // if possible.
1668       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
1669       unsigned Alignment = MTI->getAlignment();
1670       if (IsMemCpySmall(Len, Alignment)) {
1671         Address Dest, Src;
1672         if (!ComputeAddress(MTI->getRawDest(), Dest) ||
1673             !ComputeAddress(MTI->getRawSource(), Src))
1674           return false;
1675         if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
1676           return true;
1677       }
1678     }
1679
1680     if (!MTI->getLength()->getType()->isIntegerTy(64))
1681       return false;
1682
1683     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
1684       // Fast instruction selection doesn't support the special
1685       // address spaces.
1686       return false;
1687
1688     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
1689     return LowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
1690   }
1691   case Intrinsic::memset: {
1692     const MemSetInst *MSI = cast<MemSetInst>(II);
1693     // Don't handle volatile.
1694     if (MSI->isVolatile())
1695       return false;
1696
1697     if (!MSI->getLength()->getType()->isIntegerTy(64))
1698       return false;
1699
1700     if (MSI->getDestAddressSpace() > 255)
1701       // Fast instruction selection doesn't support the special
1702       // address spaces.
1703       return false;
1704
1705     return LowerCallTo(II, "memset", II->getNumArgOperands() - 2);
1706   }
1707   case Intrinsic::trap: {
1708     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
1709         .addImm(1);
1710     return true;
1711   }
1712   case Intrinsic::sqrt: {
1713     Type *RetTy = II->getCalledFunction()->getReturnType();
1714
1715     MVT VT;
1716     if (!isTypeLegal(RetTy, VT))
1717       return false;
1718
1719     unsigned Op0Reg = getRegForValue(II->getOperand(0));
1720     if (!Op0Reg)
1721       return false;
1722     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
1723
1724     unsigned ResultReg = FastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
1725     if (!ResultReg)
1726       return false;
1727
1728     UpdateValueMap(II, ResultReg);
1729     return true;
1730   }
1731   case Intrinsic::sadd_with_overflow:
1732   case Intrinsic::uadd_with_overflow:
1733   case Intrinsic::ssub_with_overflow:
1734   case Intrinsic::usub_with_overflow:
1735   case Intrinsic::smul_with_overflow:
1736   case Intrinsic::umul_with_overflow: {
1737     // This implements the basic lowering of the xalu with overflow intrinsics.
1738     const Function *Callee = II->getCalledFunction();
1739     auto *Ty = cast<StructType>(Callee->getReturnType());
1740     Type *RetTy = Ty->getTypeAtIndex(0U);
1741     Type *CondTy = Ty->getTypeAtIndex(1);
1742
1743     MVT VT;
1744     if (!isTypeLegal(RetTy, VT))
1745       return false;
1746
1747     if (VT != MVT::i32 && VT != MVT::i64)
1748       return false;
1749
1750     const Value *LHS = II->getArgOperand(0);
1751     const Value *RHS = II->getArgOperand(1);
1752     // Canonicalize immediate to the RHS.
1753     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
1754         isCommutativeIntrinsic(II))
1755       std::swap(LHS, RHS);
1756
1757     unsigned LHSReg = getRegForValue(LHS);
1758     if (!LHSReg)
1759       return false;
1760     bool LHSIsKill = hasTrivialKill(LHS);
1761
1762     unsigned RHSReg = 0;
1763     bool RHSIsKill = false;
1764     bool UseImm = true;
1765     if (!isa<ConstantInt>(RHS)) {
1766       RHSReg = getRegForValue(RHS);
1767       if (!RHSReg)
1768         return false;
1769       RHSIsKill = hasTrivialKill(RHS);
1770       UseImm = false;
1771     }
1772
1773     unsigned Opc = 0;
1774     unsigned MulReg = 0;
1775     AArch64CC::CondCode CC = AArch64CC::Invalid;
1776     bool Is64Bit = VT == MVT::i64;
1777     switch (II->getIntrinsicID()) {
1778     default: llvm_unreachable("Unexpected intrinsic!");
1779     case Intrinsic::sadd_with_overflow:
1780       if (UseImm)
1781         Opc = Is64Bit ? AArch64::ADDSXri : AArch64::ADDSWri;
1782       else
1783         Opc = Is64Bit ? AArch64::ADDSXrr : AArch64::ADDSWrr;
1784       CC = AArch64CC::VS;
1785       break;
1786     case Intrinsic::uadd_with_overflow:
1787       if (UseImm)
1788         Opc = Is64Bit ? AArch64::ADDSXri : AArch64::ADDSWri;
1789       else
1790         Opc = Is64Bit ? AArch64::ADDSXrr : AArch64::ADDSWrr;
1791       CC = AArch64CC::HS;
1792       break;
1793     case Intrinsic::ssub_with_overflow:
1794       if (UseImm)
1795         Opc = Is64Bit ? AArch64::SUBSXri : AArch64::SUBSWri;
1796       else
1797         Opc = Is64Bit ? AArch64::SUBSXrr : AArch64::SUBSWrr;
1798       CC = AArch64CC::VS;
1799       break;
1800     case Intrinsic::usub_with_overflow:
1801       if (UseImm)
1802         Opc = Is64Bit ? AArch64::SUBSXri : AArch64::SUBSWri;
1803       else
1804         Opc = Is64Bit ? AArch64::SUBSXrr : AArch64::SUBSWrr;
1805       CC = AArch64CC::LO;
1806       break;
1807     case Intrinsic::smul_with_overflow: {
1808       CC = AArch64CC::NE;
1809       if (UseImm) {
1810         RHSReg = getRegForValue(RHS);
1811         if (!RHSReg)
1812           return false;
1813         RHSIsKill = hasTrivialKill(RHS);
1814       }
1815       if (VT == MVT::i32) {
1816         MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1817         unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32);
1818         MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
1819                                             AArch64::sub_32);
1820         ShiftReg = FastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
1821                                               AArch64::sub_32);
1822         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
1823         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1824                 TII.get(AArch64::SUBSWrs), CmpReg)
1825           .addReg(ShiftReg, getKillRegState(true))
1826           .addReg(MulReg, getKillRegState(false))
1827           .addImm(159); // 159 <-> asr #31
1828       } else {
1829         assert(VT == MVT::i64 && "Unexpected value type.");
1830         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1831         unsigned SMULHReg = FastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
1832                                         RHSReg, RHSIsKill);
1833         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
1834         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1835                 TII.get(AArch64::SUBSXrs), CmpReg)
1836           .addReg(SMULHReg, getKillRegState(true))
1837           .addReg(MulReg, getKillRegState(false))
1838           .addImm(191); // 191 <-> asr #63
1839       }
1840       break;
1841     }
1842     case Intrinsic::umul_with_overflow: {
1843       CC = AArch64CC::NE;
1844       if (UseImm) {
1845         RHSReg = getRegForValue(RHS);
1846         if (!RHSReg)
1847           return false;
1848         RHSIsKill = hasTrivialKill(RHS);
1849       }
1850       if (VT == MVT::i32) {
1851         MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1852         unsigned CmpReg = createResultReg(TLI.getRegClassFor(MVT::i64));
1853         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1854                 TII.get(AArch64::SUBSXrs), CmpReg)
1855           .addReg(AArch64::XZR, getKillRegState(true))
1856           .addReg(MulReg, getKillRegState(false))
1857           .addImm(96); // 96 <-> lsr #32
1858         MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
1859                                             AArch64::sub_32);
1860       } else {
1861         assert(VT == MVT::i64 && "Unexpected value type.");
1862         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1863         unsigned UMULHReg = FastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
1864                                         RHSReg, RHSIsKill);
1865         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
1866         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1867                 TII.get(AArch64::SUBSXrr), CmpReg)
1868         .addReg(AArch64::XZR, getKillRegState(true))
1869         .addReg(UMULHReg, getKillRegState(false));
1870       }
1871       break;
1872     }
1873     }
1874
1875     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
1876     if (Opc) {
1877       MachineInstrBuilder MIB;
1878       MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
1879                     ResultReg)
1880               .addReg(LHSReg, getKillRegState(LHSIsKill));
1881       if (UseImm)
1882         MIB.addImm(cast<ConstantInt>(RHS)->getZExtValue());
1883       else
1884         MIB.addReg(RHSReg, getKillRegState(RHSIsKill));
1885     }
1886     else
1887       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1888               TII.get(TargetOpcode::COPY), ResultReg)
1889         .addReg(MulReg);
1890
1891     unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
1892     assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
1893     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1894             ResultReg2)
1895       .addReg(AArch64::WZR, getKillRegState(true))
1896       .addReg(AArch64::WZR, getKillRegState(true))
1897       .addImm(getInvertedCondCode(CC));
1898
1899     UpdateValueMap(II, ResultReg, 2);
1900     return true;
1901   }
1902   }
1903   return false;
1904 }
1905
1906 bool AArch64FastISel::SelectRet(const Instruction *I) {
1907   const ReturnInst *Ret = cast<ReturnInst>(I);
1908   const Function &F = *I->getParent()->getParent();
1909
1910   if (!FuncInfo.CanLowerReturn)
1911     return false;
1912
1913   if (F.isVarArg())
1914     return false;
1915
1916   // Build a list of return value registers.
1917   SmallVector<unsigned, 4> RetRegs;
1918
1919   if (Ret->getNumOperands() > 0) {
1920     CallingConv::ID CC = F.getCallingConv();
1921     SmallVector<ISD::OutputArg, 4> Outs;
1922     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
1923
1924     // Analyze operands of the call, assigning locations to each operand.
1925     SmallVector<CCValAssign, 16> ValLocs;
1926     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
1927                    I->getContext());
1928     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
1929                                                      : RetCC_AArch64_AAPCS;
1930     CCInfo.AnalyzeReturn(Outs, RetCC);
1931
1932     // Only handle a single return value for now.
1933     if (ValLocs.size() != 1)
1934       return false;
1935
1936     CCValAssign &VA = ValLocs[0];
1937     const Value *RV = Ret->getOperand(0);
1938
1939     // Don't bother handling odd stuff for now.
1940     if (VA.getLocInfo() != CCValAssign::Full)
1941       return false;
1942     // Only handle register returns for now.
1943     if (!VA.isRegLoc())
1944       return false;
1945     unsigned Reg = getRegForValue(RV);
1946     if (Reg == 0)
1947       return false;
1948
1949     unsigned SrcReg = Reg + VA.getValNo();
1950     unsigned DestReg = VA.getLocReg();
1951     // Avoid a cross-class copy. This is very unlikely.
1952     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
1953       return false;
1954
1955     EVT RVEVT = TLI.getValueType(RV->getType());
1956     if (!RVEVT.isSimple())
1957       return false;
1958
1959     // Vectors (of > 1 lane) in big endian need tricky handling.
1960     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
1961       return false;
1962
1963     MVT RVVT = RVEVT.getSimpleVT();
1964     if (RVVT == MVT::f128)
1965       return false;
1966     MVT DestVT = VA.getValVT();
1967     // Special handling for extended integers.
1968     if (RVVT != DestVT) {
1969       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
1970         return false;
1971
1972       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1973         return false;
1974
1975       bool isZExt = Outs[0].Flags.isZExt();
1976       SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
1977       if (SrcReg == 0)
1978         return false;
1979     }
1980
1981     // Make the copy.
1982     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1983             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
1984
1985     // Add register to return instruction.
1986     RetRegs.push_back(VA.getLocReg());
1987   }
1988
1989   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1990                                     TII.get(AArch64::RET_ReallyLR));
1991   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1992     MIB.addReg(RetRegs[i], RegState::Implicit);
1993   return true;
1994 }
1995
1996 bool AArch64FastISel::SelectTrunc(const Instruction *I) {
1997   Type *DestTy = I->getType();
1998   Value *Op = I->getOperand(0);
1999   Type *SrcTy = Op->getType();
2000
2001   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2002   EVT DestEVT = TLI.getValueType(DestTy, true);
2003   if (!SrcEVT.isSimple())
2004     return false;
2005   if (!DestEVT.isSimple())
2006     return false;
2007
2008   MVT SrcVT = SrcEVT.getSimpleVT();
2009   MVT DestVT = DestEVT.getSimpleVT();
2010
2011   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2012       SrcVT != MVT::i8)
2013     return false;
2014   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
2015       DestVT != MVT::i1)
2016     return false;
2017
2018   unsigned SrcReg = getRegForValue(Op);
2019   if (!SrcReg)
2020     return false;
2021
2022   // If we're truncating from i64 to a smaller non-legal type then generate an
2023   // AND.  Otherwise, we know the high bits are undefined and a truncate doesn't
2024   // generate any code.
2025   if (SrcVT == MVT::i64) {
2026     uint64_t Mask = 0;
2027     switch (DestVT.SimpleTy) {
2028     default:
2029       // Trunc i64 to i32 is handled by the target-independent fast-isel.
2030       return false;
2031     case MVT::i1:
2032       Mask = 0x1;
2033       break;
2034     case MVT::i8:
2035       Mask = 0xff;
2036       break;
2037     case MVT::i16:
2038       Mask = 0xffff;
2039       break;
2040     }
2041     // Issue an extract_subreg to get the lower 32-bits.
2042     unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true,
2043                                                 AArch64::sub_32);
2044     MRI.constrainRegClass(Reg32, &AArch64::GPR32RegClass);
2045     // Create the AND instruction which performs the actual truncation.
2046     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
2047     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
2048             ANDReg)
2049         .addReg(Reg32)
2050         .addImm(AArch64_AM::encodeLogicalImmediate(Mask, 32));
2051     SrcReg = ANDReg;
2052   }
2053
2054   UpdateValueMap(I, SrcReg);
2055   return true;
2056 }
2057
2058 unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
2059   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
2060           DestVT == MVT::i64) &&
2061          "Unexpected value type.");
2062   // Handle i8 and i16 as i32.
2063   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2064     DestVT = MVT::i32;
2065
2066   if (isZExt) {
2067     MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2068     unsigned ResultReg = createResultReg(&AArch64::GPR32spRegClass);
2069     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
2070             ResultReg)
2071         .addReg(SrcReg)
2072         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2073
2074     if (DestVT == MVT::i64) {
2075       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
2076       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
2077       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2078       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2079               TII.get(AArch64::SUBREG_TO_REG), Reg64)
2080           .addImm(0)
2081           .addReg(ResultReg)
2082           .addImm(AArch64::sub_32);
2083       ResultReg = Reg64;
2084     }
2085     return ResultReg;
2086   } else {
2087     if (DestVT == MVT::i64) {
2088       // FIXME: We're SExt i1 to i64.
2089       return 0;
2090     }
2091     unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
2092     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SBFMWri),
2093             ResultReg)
2094         .addReg(SrcReg)
2095         .addImm(0)
2096         .addImm(0);
2097     return ResultReg;
2098   }
2099 }
2100
2101 unsigned AArch64FastISel::Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2102                                       unsigned Op1, bool Op1IsKill) {
2103   unsigned Opc, ZReg;
2104   switch (RetVT.SimpleTy) {
2105   default: return 0;
2106   case MVT::i8:
2107   case MVT::i16:
2108   case MVT::i32:
2109     RetVT = MVT::i32;
2110     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
2111   case MVT::i64:
2112     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
2113   }
2114
2115   // Create the base instruction, then add the operands.
2116   unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
2117   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2118     .addReg(Op0, getKillRegState(Op0IsKill))
2119     .addReg(Op1, getKillRegState(Op1IsKill))
2120     .addReg(ZReg, getKillRegState(true));
2121
2122   return ResultReg;
2123 }
2124
2125 unsigned AArch64FastISel::Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2126                                         unsigned Op1, bool Op1IsKill) {
2127   if (RetVT != MVT::i64)
2128     return 0;
2129
2130   // Create the base instruction, then add the operands.
2131   unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
2132   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SMADDLrrr),
2133           ResultReg)
2134     .addReg(Op0, getKillRegState(Op0IsKill))
2135     .addReg(Op1, getKillRegState(Op1IsKill))
2136     .addReg(AArch64::XZR, getKillRegState(true));
2137
2138   return ResultReg;
2139 }
2140
2141 unsigned AArch64FastISel::Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2142                                         unsigned Op1, bool Op1IsKill) {
2143   if (RetVT != MVT::i64)
2144     return 0;
2145
2146   // Create the base instruction, then add the operands.
2147   unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
2148   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::UMADDLrrr),
2149           ResultReg)
2150     .addReg(Op0, getKillRegState(Op0IsKill))
2151     .addReg(Op1, getKillRegState(Op1IsKill))
2152     .addReg(AArch64::XZR, getKillRegState(true));
2153
2154   return ResultReg;
2155 }
2156
2157 unsigned AArch64FastISel::Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2158                                       uint64_t Shift) {
2159   unsigned Opc, ImmR, ImmS;
2160   switch (RetVT.SimpleTy) {
2161   default: return 0;
2162   case MVT::i8:
2163   case MVT::i16:
2164   case MVT::i32:
2165     RetVT = MVT::i32;
2166     Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 31 - Shift; break;
2167   case MVT::i64:
2168     Opc = AArch64::UBFMXri; ImmR = -Shift % 64; ImmS = 63 - Shift; break;
2169   }
2170
2171   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, ImmR,
2172                           ImmS);
2173 }
2174
2175 unsigned AArch64FastISel::Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2176                                       uint64_t Shift) {
2177   unsigned Opc, ImmS;
2178   switch (RetVT.SimpleTy) {
2179   default: return 0;
2180   case MVT::i8:
2181   case MVT::i16:
2182   case MVT::i32:
2183     RetVT = MVT::i32;
2184     Opc = AArch64::UBFMWri; ImmS = 31; break;
2185   case MVT::i64:
2186     Opc = AArch64::UBFMXri; ImmS = 63; break;
2187   }
2188
2189   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
2190                           ImmS);
2191 }
2192
2193 unsigned AArch64FastISel::Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2194                                       uint64_t Shift) {
2195   unsigned Opc, ImmS;
2196   switch (RetVT.SimpleTy) {
2197   default: return 0;
2198   case MVT::i8:
2199   case MVT::i16:
2200   case MVT::i32:
2201     RetVT = MVT::i32;
2202     Opc = AArch64::SBFMWri; ImmS = 31; break;
2203   case MVT::i64:
2204     Opc = AArch64::SBFMXri; ImmS = 63; break;
2205   }
2206
2207   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
2208                           ImmS);
2209 }
2210
2211 unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
2212                                      bool isZExt) {
2213   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
2214
2215   // FastISel does not have plumbing to deal with extensions where the SrcVT or
2216   // DestVT are odd things, so test to make sure that they are both types we can
2217   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
2218   // bail out to SelectionDAG.
2219   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
2220        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
2221       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
2222        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
2223     return 0;
2224
2225   unsigned Opc;
2226   unsigned Imm = 0;
2227
2228   switch (SrcVT.SimpleTy) {
2229   default:
2230     return 0;
2231   case MVT::i1:
2232     return Emiti1Ext(SrcReg, DestVT, isZExt);
2233   case MVT::i8:
2234     if (DestVT == MVT::i64)
2235       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2236     else
2237       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
2238     Imm = 7;
2239     break;
2240   case MVT::i16:
2241     if (DestVT == MVT::i64)
2242       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2243     else
2244       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
2245     Imm = 15;
2246     break;
2247   case MVT::i32:
2248     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
2249     Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2250     Imm = 31;
2251     break;
2252   }
2253
2254   // Handle i8 and i16 as i32.
2255   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2256     DestVT = MVT::i32;
2257   else if (DestVT == MVT::i64) {
2258     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2259     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2260             TII.get(AArch64::SUBREG_TO_REG), Src64)
2261         .addImm(0)
2262         .addReg(SrcReg)
2263         .addImm(AArch64::sub_32);
2264     SrcReg = Src64;
2265   }
2266
2267   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2268   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2269       .addReg(SrcReg)
2270       .addImm(0)
2271       .addImm(Imm);
2272
2273   return ResultReg;
2274 }
2275
2276 bool AArch64FastISel::SelectIntExt(const Instruction *I) {
2277   // On ARM, in general, integer casts don't involve legal types; this code
2278   // handles promotable integers.  The high bits for a type smaller than
2279   // the register size are assumed to be undefined.
2280   Type *DestTy = I->getType();
2281   Value *Src = I->getOperand(0);
2282   Type *SrcTy = Src->getType();
2283
2284   bool isZExt = isa<ZExtInst>(I);
2285   unsigned SrcReg = getRegForValue(Src);
2286   if (!SrcReg)
2287     return false;
2288
2289   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2290   EVT DestEVT = TLI.getValueType(DestTy, true);
2291   if (!SrcEVT.isSimple())
2292     return false;
2293   if (!DestEVT.isSimple())
2294     return false;
2295
2296   MVT SrcVT = SrcEVT.getSimpleVT();
2297   MVT DestVT = DestEVT.getSimpleVT();
2298   unsigned ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2299   if (ResultReg == 0)
2300     return false;
2301   UpdateValueMap(I, ResultReg);
2302   return true;
2303 }
2304
2305 bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
2306   EVT DestEVT = TLI.getValueType(I->getType(), true);
2307   if (!DestEVT.isSimple())
2308     return false;
2309
2310   MVT DestVT = DestEVT.getSimpleVT();
2311   if (DestVT != MVT::i64 && DestVT != MVT::i32)
2312     return false;
2313
2314   unsigned DivOpc;
2315   bool is64bit = (DestVT == MVT::i64);
2316   switch (ISDOpcode) {
2317   default:
2318     return false;
2319   case ISD::SREM:
2320     DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
2321     break;
2322   case ISD::UREM:
2323     DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
2324     break;
2325   }
2326   unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
2327   unsigned Src0Reg = getRegForValue(I->getOperand(0));
2328   if (!Src0Reg)
2329     return false;
2330
2331   unsigned Src1Reg = getRegForValue(I->getOperand(1));
2332   if (!Src1Reg)
2333     return false;
2334
2335   unsigned QuotReg = createResultReg(TLI.getRegClassFor(DestVT));
2336   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), QuotReg)
2337       .addReg(Src0Reg)
2338       .addReg(Src1Reg);
2339   // The remainder is computed as numerator - (quotient * denominator) using the
2340   // MSUB instruction.
2341   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2342   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg)
2343       .addReg(QuotReg)
2344       .addReg(Src1Reg)
2345       .addReg(Src0Reg);
2346   UpdateValueMap(I, ResultReg);
2347   return true;
2348 }
2349
2350 bool AArch64FastISel::SelectMul(const Instruction *I) {
2351   EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2352   if (!SrcEVT.isSimple())
2353     return false;
2354   MVT SrcVT = SrcEVT.getSimpleVT();
2355
2356   // Must be simple value type.  Don't handle vectors.
2357   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2358       SrcVT != MVT::i8)
2359     return false;
2360
2361   unsigned Src0Reg = getRegForValue(I->getOperand(0));
2362   if (!Src0Reg)
2363     return false;
2364   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
2365
2366   unsigned Src1Reg = getRegForValue(I->getOperand(1));
2367   if (!Src1Reg)
2368     return false;
2369   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
2370
2371   unsigned ResultReg =
2372     Emit_MUL_rr(SrcVT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
2373
2374   if (!ResultReg)
2375     return false;
2376
2377   UpdateValueMap(I, ResultReg);
2378   return true;
2379 }
2380
2381 bool AArch64FastISel::SelectShift(const Instruction *I, bool IsLeftShift,
2382                                   bool IsArithmetic) {
2383   EVT RetEVT = TLI.getValueType(I->getType(), true);
2384   if (!RetEVT.isSimple())
2385     return false;
2386   MVT RetVT = RetEVT.getSimpleVT();
2387
2388   if (!isa<ConstantInt>(I->getOperand(1)))
2389     return false;
2390
2391   unsigned Op0Reg = getRegForValue(I->getOperand(0));
2392   if (!Op0Reg)
2393     return false;
2394   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
2395
2396   uint64_t ShiftVal = cast<ConstantInt>(I->getOperand(1))->getZExtValue();
2397
2398   unsigned ResultReg;
2399   if (IsLeftShift)
2400     ResultReg = Emit_LSL_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2401   else {
2402     if (IsArithmetic)
2403       ResultReg = Emit_ASR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2404     else
2405       ResultReg = Emit_LSR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2406   }
2407
2408   if (!ResultReg)
2409     return false;
2410
2411   UpdateValueMap(I, ResultReg);
2412   return true;
2413 }
2414
2415 bool AArch64FastISel::SelectBitCast(const Instruction *I) {
2416   MVT RetVT, SrcVT;
2417
2418   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
2419     return false;
2420   if (!isTypeLegal(I->getType(), RetVT))
2421     return false;
2422
2423   unsigned Opc;
2424   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
2425     Opc = AArch64::FMOVWSr;
2426   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
2427     Opc = AArch64::FMOVXDr;
2428   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
2429     Opc = AArch64::FMOVSWr;
2430   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
2431     Opc = AArch64::FMOVDXr;
2432   else
2433     return false;
2434
2435   unsigned Op0Reg = getRegForValue(I->getOperand(0));
2436   if (!Op0Reg)
2437     return false;
2438   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
2439   unsigned ResultReg = FastEmitInst_r(Opc, TLI.getRegClassFor(RetVT),
2440                                       Op0Reg, Op0IsKill);
2441
2442   if (!ResultReg)
2443     return false;
2444
2445   UpdateValueMap(I, ResultReg);
2446   return true;
2447 }
2448
2449 bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
2450   switch (I->getOpcode()) {
2451   default:
2452     break;
2453   case Instruction::Load:
2454     return SelectLoad(I);
2455   case Instruction::Store:
2456     return SelectStore(I);
2457   case Instruction::Br:
2458     return SelectBranch(I);
2459   case Instruction::IndirectBr:
2460     return SelectIndirectBr(I);
2461   case Instruction::FCmp:
2462   case Instruction::ICmp:
2463     return SelectCmp(I);
2464   case Instruction::Select:
2465     return SelectSelect(I);
2466   case Instruction::FPExt:
2467     return SelectFPExt(I);
2468   case Instruction::FPTrunc:
2469     return SelectFPTrunc(I);
2470   case Instruction::FPToSI:
2471     return SelectFPToInt(I, /*Signed=*/true);
2472   case Instruction::FPToUI:
2473     return SelectFPToInt(I, /*Signed=*/false);
2474   case Instruction::SIToFP:
2475     return SelectIntToFP(I, /*Signed=*/true);
2476   case Instruction::UIToFP:
2477     return SelectIntToFP(I, /*Signed=*/false);
2478   case Instruction::SRem:
2479     return SelectRem(I, ISD::SREM);
2480   case Instruction::URem:
2481     return SelectRem(I, ISD::UREM);
2482   case Instruction::Ret:
2483     return SelectRet(I);
2484   case Instruction::Trunc:
2485     return SelectTrunc(I);
2486   case Instruction::ZExt:
2487   case Instruction::SExt:
2488     return SelectIntExt(I);
2489
2490   // FIXME: All of these should really be handled by the target-independent
2491   // selector -> improve FastISel tblgen.
2492   case Instruction::Mul:
2493     return SelectMul(I);
2494   case Instruction::Shl:
2495       return SelectShift(I, /*IsLeftShift=*/true, /*IsArithmetic=*/false);
2496   case Instruction::LShr:
2497     return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/false);
2498   case Instruction::AShr:
2499     return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/true);
2500   case Instruction::BitCast:
2501     return SelectBitCast(I);
2502   }
2503   return false;
2504   // Silence warnings.
2505   (void)&CC_AArch64_DarwinPCS_VarArg;
2506 }
2507
2508 namespace llvm {
2509 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
2510                                         const TargetLibraryInfo *libInfo) {
2511   return new AArch64FastISel(funcInfo, libInfo);
2512 }
2513 }