lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/Analysis/BranchProbabilityInfo.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/FastISel.h"
  23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  24 #include "llvm/CodeGen/MachineConstantPool.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/IR/CallingConv.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/GetElementPtrTypeIterator.h"
  33 #include "llvm/IR/GlobalAlias.h"
  34 #include "llvm/IR/GlobalVariable.h"
  35 #include "llvm/IR/Instructions.h"
  36 #include "llvm/IR/IntrinsicInst.h"
  37 #include "llvm/IR/Operator.h"
  38 #include "llvm/Support/CommandLine.h"
  39 using namespace llvm;
  40
  41 namespace {
  42
  43 class AArch64FastISel : public FastISel {
  44
  45   class Address {
  46   public:
  47     typedef enum {
  48       RegBase,
  49       FrameIndexBase
  50     } BaseKind;
  51
  52   private:
  53     BaseKind Kind;
  54     union {
  55       unsigned Reg;
  56       int FI;
  57     } Base;
  58     int64_t Offset;
  59     const GlobalValue *GV;
  60
  61   public:
  62     Address() : Kind(RegBase), Offset(0), GV(nullptr) { Base.Reg = 0; }
  63     void setKind(BaseKind K) { Kind = K; }
  64     BaseKind getKind() const { return Kind; }
  65     bool isRegBase() const { return Kind == RegBase; }
  66     bool isFIBase() const { return Kind == FrameIndexBase; }
  67     void setReg(unsigned Reg) {
  68       assert(isRegBase() && "Invalid base register access!");
  69       Base.Reg = Reg;
  70     }
  71     unsigned getReg() const {
  72       assert(isRegBase() && "Invalid base register access!");
  73       return Base.Reg;
  74     }
  75     void setFI(unsigned FI) {
  76       assert(isFIBase() && "Invalid base frame index  access!");
  77       Base.FI = FI;
  78     }
  79     unsigned getFI() const {
  80       assert(isFIBase() && "Invalid base frame index access!");
  81       return Base.FI;
  82     }
  83     void setOffset(int64_t O) { Offset = O; }
  84     int64_t getOffset() { return Offset; }
  85
  86     void setGlobalValue(const GlobalValue *G) { GV = G; }
  87     const GlobalValue *getGlobalValue() { return GV; }
  88
  89     bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); }
  90   };
  91
  92   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
  93   /// make the right decision when generating code for different targets.
  94   const AArch64Subtarget *Subtarget;
  95   LLVMContext *Context;
  96
  97   bool FastLowerCall(CallLoweringInfo &CLI) override;
  98   bool FastLowerIntrinsicCall(const IntrinsicInst *II) override;
  99
 100 private:
 101   // Selection routines.
 102   bool SelectLoad(const Instruction *I);
 103   bool SelectStore(const Instruction *I);
 104   bool SelectBranch(const Instruction *I);
 105   bool SelectIndirectBr(const Instruction *I);
 106   bool SelectCmp(const Instruction *I);
 107   bool SelectSelect(const Instruction *I);
 108   bool SelectFPExt(const Instruction *I);
 109   bool SelectFPTrunc(const Instruction *I);
 110   bool SelectFPToInt(const Instruction *I, bool Signed);
 111   bool SelectIntToFP(const Instruction *I, bool Signed);
 112   bool SelectRem(const Instruction *I, unsigned ISDOpcode);
 113   bool SelectRet(const Instruction *I);
 114   bool SelectTrunc(const Instruction *I);
 115   bool SelectIntExt(const Instruction *I);
 116   bool SelectMul(const Instruction *I);
 117   bool SelectShift(const Instruction *I, bool IsLeftShift, bool IsArithmetic);
 118   bool SelectBitCast(const Instruction *I);
 119
 120   // Utility helper routines.
 121   bool isTypeLegal(Type *Ty, MVT &VT);
 122   bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
 123   bool ComputeAddress(const Value *Obj, Address &Addr);
 124   bool ComputeCallAddress(const Value *V, Address &Addr);
 125   bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
 126                        bool UseUnscaled);
 127   void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 128                             unsigned Flags, bool UseUnscaled);
 129   bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
 130   bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 131                           unsigned Alignment);
 132   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 133                          const Value *Cond);
 134
 135   // Emit functions.
 136   bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
 137   bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 138                 bool UseUnscaled = false);
 139   bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 140                  bool UseUnscaled = false);
 141   unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 142   unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 143   unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 144                        unsigned Op1, bool Op1IsKill);
 145   unsigned Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 146                          unsigned Op1, bool Op1IsKill);
 147   unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 148                          unsigned Op1, bool Op1IsKill);
 149   unsigned Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 150   unsigned Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 151   unsigned Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 152
 153   unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
 154   unsigned AArch64MaterializeGV(const GlobalValue *GV);
 155
 156   // Call handling routines.
 157 private:
 158   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 159   bool ProcessCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 160                        unsigned &NumBytes);
 161   bool FinishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 162
 163 public:
 164   // Backend specific FastISel code.
 165   unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
 166   unsigned TargetMaterializeConstant(const Constant *C) override;
 167
 168   explicit AArch64FastISel(FunctionLoweringInfo &funcInfo,
 169                          const TargetLibraryInfo *libInfo)
 170       : FastISel(funcInfo, libInfo) {
 171     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 172     Context = &funcInfo.Fn->getContext();
 173   }
 174
 175   bool TargetSelectInstruction(const Instruction *I) override;
 176
 177 #include "AArch64GenFastISel.inc"
 178 };
 179
 180 } // end anonymous namespace
 181
 182 #include "AArch64GenCallingConv.inc"
 183
 184 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 185   if (CC == CallingConv::WebKit_JS)
 186     return CC_AArch64_WebKit_JS;
 187   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 188 }
 189
 190 unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
 191   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
 192          "Alloca should always return a pointer.");
 193
 194   // Don't handle dynamic allocas.
 195   if (!FuncInfo.StaticAllocaMap.count(AI))
 196     return 0;
 197
 198   DenseMap<const AllocaInst *, int>::iterator SI =
 199       FuncInfo.StaticAllocaMap.find(AI);
 200
 201   if (SI != FuncInfo.StaticAllocaMap.end()) {
 202     unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
 203     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 204             ResultReg)
 205         .addFrameIndex(SI->second)
 206         .addImm(0)
 207         .addImm(0);
 208     return ResultReg;
 209   }
 210
 211   return 0;
 212 }
 213
 214 unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
 215   if (VT != MVT::f32 && VT != MVT::f64)
 216     return 0;
 217
 218   const APFloat Val = CFP->getValueAPF();
 219   bool is64bit = (VT == MVT::f64);
 220
 221   // This checks to see if we can use FMOV instructions to materialize
 222   // a constant, otherwise we have to materialize via the constant pool.
 223   if (TLI.isFPImmLegal(Val, VT)) {
 224     int Imm;
 225     unsigned Opc;
 226     if (is64bit) {
 227       Imm = AArch64_AM::getFP64Imm(Val);
 228       Opc = AArch64::FMOVDi;
 229     } else {
 230       Imm = AArch64_AM::getFP32Imm(Val);
 231       Opc = AArch64::FMOVSi;
 232     }
 233     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 234     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 235         .addImm(Imm);
 236     return ResultReg;
 237   }
 238
 239   // Materialize via constant pool.  MachineConstantPool wants an explicit
 240   // alignment.
 241   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 242   if (Align == 0)
 243     Align = DL.getTypeAllocSize(CFP->getType());
 244
 245   unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 246   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 247   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 248           ADRPReg).addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGE);
 249
 250   unsigned Opc = is64bit ? AArch64::LDRDui : AArch64::LDRSui;
 251   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 252   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 253       .addReg(ADRPReg)
 254       .addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 255   return ResultReg;
 256 }
 257
 258 unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
 259   // We can't handle thread-local variables quickly yet.
 260   if (GV->isThreadLocal())
 261     return 0;
 262
 263   // MachO still uses GOT for large code-model accesses, but ELF requires
 264   // movz/movk sequences, which FastISel doesn't handle yet.
 265   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
 266     return 0;
 267
 268   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 269
 270   EVT DestEVT = TLI.getValueType(GV->getType(), true);
 271   if (!DestEVT.isSimple())
 272     return 0;
 273
 274   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 275   unsigned ResultReg;
 276
 277   if (OpFlags & AArch64II::MO_GOT) {
 278     // ADRP + LDRX
 279     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 280             ADRPReg)
 281         .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 282
 283     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 284     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 285             ResultReg)
 286         .addReg(ADRPReg)
 287         .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 288                           AArch64II::MO_NC);
 289   } else {
 290     // ADRP + ADDX
 291     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 292             ADRPReg).addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 293
 294     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 295     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 296             ResultReg)
 297         .addReg(ADRPReg)
 298         .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 299         .addImm(0);
 300   }
 301   return ResultReg;
 302 }
 303
 304 unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
 305   EVT CEVT = TLI.getValueType(C->getType(), true);
 306
 307   // Only handle simple types.
 308   if (!CEVT.isSimple())
 309     return 0;
 310   MVT VT = CEVT.getSimpleVT();
 311
 312   // FIXME: Handle ConstantInt.
 313   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 314     return AArch64MaterializeFP(CFP, VT);
 315   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 316     return AArch64MaterializeGV(GV);
 317
 318   return 0;
 319 }
 320
 321 // Computes the address to get to an object.
 322 bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
 323   const User *U = nullptr;
 324   unsigned Opcode = Instruction::UserOp1;
 325   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 326     // Don't walk into other basic blocks unless the object is an alloca from
 327     // another block, otherwise it may not have a virtual register assigned.
 328     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 329         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 330       Opcode = I->getOpcode();
 331       U = I;
 332     }
 333   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 334     Opcode = C->getOpcode();
 335     U = C;
 336   }
 337
 338   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 339     if (Ty->getAddressSpace() > 255)
 340       // Fast instruction selection doesn't support the special
 341       // address spaces.
 342       return false;
 343
 344   switch (Opcode) {
 345   default:
 346     break;
 347   case Instruction::BitCast: {
 348     // Look through bitcasts.
 349     return ComputeAddress(U->getOperand(0), Addr);
 350   }
 351   case Instruction::IntToPtr: {
 352     // Look past no-op inttoptrs.
 353     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 354       return ComputeAddress(U->getOperand(0), Addr);
 355     break;
 356   }
 357   case Instruction::PtrToInt: {
 358     // Look past no-op ptrtoints.
 359     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 360       return ComputeAddress(U->getOperand(0), Addr);
 361     break;
 362   }
 363   case Instruction::GetElementPtr: {
 364     Address SavedAddr = Addr;
 365     uint64_t TmpOffset = Addr.getOffset();
 366
 367     // Iterate through the GEP folding the constants into offsets where
 368     // we can.
 369     gep_type_iterator GTI = gep_type_begin(U);
 370     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
 371          ++i, ++GTI) {
 372       const Value *Op = *i;
 373       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
 374         const StructLayout *SL = DL.getStructLayout(STy);
 375         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 376         TmpOffset += SL->getElementOffset(Idx);
 377       } else {
 378         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 379         for (;;) {
 380           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 381             // Constant-offset addressing.
 382             TmpOffset += CI->getSExtValue() * S;
 383             break;
 384           }
 385           if (canFoldAddIntoGEP(U, Op)) {
 386             // A compatible add with a constant operand. Fold the constant.
 387             ConstantInt *CI =
 388                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 389             TmpOffset += CI->getSExtValue() * S;
 390             // Iterate on the other operand.
 391             Op = cast<AddOperator>(Op)->getOperand(0);
 392             continue;
 393           }
 394           // Unsupported
 395           goto unsupported_gep;
 396         }
 397       }
 398     }
 399
 400     // Try to grab the base operand now.
 401     Addr.setOffset(TmpOffset);
 402     if (ComputeAddress(U->getOperand(0), Addr))
 403       return true;
 404
 405     // We failed, restore everything and try the other options.
 406     Addr = SavedAddr;
 407
 408   unsupported_gep:
 409     break;
 410   }
 411   case Instruction::Alloca: {
 412     const AllocaInst *AI = cast<AllocaInst>(Obj);
 413     DenseMap<const AllocaInst *, int>::iterator SI =
 414         FuncInfo.StaticAllocaMap.find(AI);
 415     if (SI != FuncInfo.StaticAllocaMap.end()) {
 416       Addr.setKind(Address::FrameIndexBase);
 417       Addr.setFI(SI->second);
 418       return true;
 419     }
 420     break;
 421   }
 422   case Instruction::Add:
 423     // Adds of constants are common and easy enough.
 424     if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
 425       Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
 426       return ComputeAddress(U->getOperand(0), Addr);
 427     }
 428     break;
 429   }
 430
 431   // Try to get this in a register if nothing else has worked.
 432   if (!Addr.isValid())
 433     Addr.setReg(getRegForValue(Obj));
 434   return Addr.isValid();
 435 }
 436
 437 bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) {
 438   const User *U = nullptr;
 439   unsigned Opcode = Instruction::UserOp1;
 440   bool InMBB = true;
 441
 442   if (const auto *I = dyn_cast<Instruction>(V)) {
 443     Opcode = I->getOpcode();
 444     U = I;
 445     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 446   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 447     Opcode = C->getOpcode();
 448     U = C;
 449   }
 450
 451   switch (Opcode) {
 452   default: break;
 453   case Instruction::BitCast:
 454     // Look past bitcasts if its operand is in the same BB.
 455     if (InMBB)
 456       return ComputeCallAddress(U->getOperand(0), Addr);
 457     break;
 458   case Instruction::IntToPtr:
 459     // Look past no-op inttoptrs if its operand is in the same BB.
 460     if (InMBB &&
 461         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 462       return ComputeCallAddress(U->getOperand(0), Addr);
 463     break;
 464   case Instruction::PtrToInt:
 465     // Look past no-op ptrtoints if its operand is in the same BB.
 466     if (InMBB &&
 467         TLI.getValueType(U->getType()) == TLI.getPointerTy())
 468       return ComputeCallAddress(U->getOperand(0), Addr);
 469     break;
 470   }
 471
 472   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 473     Addr.setGlobalValue(GV);
 474     return true;
 475   }
 476
 477   // If all else fails, try to materialize the value in a register.
 478   if (!Addr.getGlobalValue()) {
 479     Addr.setReg(getRegForValue(V));
 480     return Addr.getReg() != 0;
 481   }
 482
 483   return false;
 484 }
 485
 486
 487 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 488   EVT evt = TLI.getValueType(Ty, true);
 489
 490   // Only handle simple types.
 491   if (evt == MVT::Other || !evt.isSimple())
 492     return false;
 493   VT = evt.getSimpleVT();
 494
 495   // This is a legal type, but it's not something we handle in fast-isel.
 496   if (VT == MVT::f128)
 497     return false;
 498
 499   // Handle all other legal types, i.e. a register that will directly hold this
 500   // value.
 501   return TLI.isTypeLegal(VT);
 502 }
 503
 504 bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
 505   if (isTypeLegal(Ty, VT))
 506     return true;
 507
 508   // If this is a type than can be sign or zero-extended to a basic operation
 509   // go ahead and accept it now. For stores, this reflects truncation.
 510   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 511     return true;
 512
 513   return false;
 514 }
 515
 516 bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
 517                                       int64_t ScaleFactor, bool UseUnscaled) {
 518   bool needsLowering = false;
 519   int64_t Offset = Addr.getOffset();
 520   switch (VT.SimpleTy) {
 521   default:
 522     return false;
 523   case MVT::i1:
 524   case MVT::i8:
 525   case MVT::i16:
 526   case MVT::i32:
 527   case MVT::i64:
 528   case MVT::f32:
 529   case MVT::f64:
 530     if (!UseUnscaled)
 531       // Using scaled, 12-bit, unsigned immediate offsets.
 532       needsLowering = ((Offset & 0xfff) != Offset);
 533     else
 534       // Using unscaled, 9-bit, signed immediate offsets.
 535       needsLowering = (Offset > 256 || Offset < -256);
 536     break;
 537   }
 538
 539   //If this is a stack pointer and the offset needs to be simplified then put
 540   // the alloca address into a register, set the base type back to register and
 541   // continue. This should almost never happen.
 542   if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
 543     unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
 544     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 545             ResultReg)
 546         .addFrameIndex(Addr.getFI())
 547         .addImm(0)
 548         .addImm(0);
 549     Addr.setKind(Address::RegBase);
 550     Addr.setReg(ResultReg);
 551   }
 552
 553   // Since the offset is too large for the load/store instruction get the
 554   // reg+offset into a register.
 555   if (needsLowering) {
 556     uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor;
 557     unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false,
 558                                       UnscaledOffset, MVT::i64);
 559     if (ResultReg == 0)
 560       return false;
 561     Addr.setReg(ResultReg);
 562     Addr.setOffset(0);
 563   }
 564   return true;
 565 }
 566
 567 void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
 568                                            const MachineInstrBuilder &MIB,
 569                                            unsigned Flags, bool UseUnscaled) {
 570   int64_t Offset = Addr.getOffset();
 571   // Frame base works a bit differently. Handle it separately.
 572   if (Addr.getKind() == Address::FrameIndexBase) {
 573     int FI = Addr.getFI();
 574     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
 575     // and alignment should be based on the VT.
 576     MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
 577         MachinePointerInfo::getFixedStack(FI, Offset), Flags,
 578         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
 579     // Now add the rest of the operands.
 580     MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
 581   } else {
 582     // Now add the rest of the operands.
 583     MIB.addReg(Addr.getReg());
 584     MIB.addImm(Offset);
 585   }
 586 }
 587
 588 bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 589                                bool UseUnscaled) {
 590   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
 591   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
 592   if (!UseUnscaled && Addr.getOffset() < 0)
 593     UseUnscaled = true;
 594
 595   unsigned Opc;
 596   const TargetRegisterClass *RC;
 597   bool VTIsi1 = false;
 598   int64_t ScaleFactor = 0;
 599   switch (VT.SimpleTy) {
 600   default:
 601     return false;
 602   case MVT::i1:
 603     VTIsi1 = true;
 604   // Intentional fall-through.
 605   case MVT::i8:
 606     Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui;
 607     RC = &AArch64::GPR32RegClass;
 608     ScaleFactor = 1;
 609     break;
 610   case MVT::i16:
 611     Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui;
 612     RC = &AArch64::GPR32RegClass;
 613     ScaleFactor = 2;
 614     break;
 615   case MVT::i32:
 616     Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui;
 617     RC = &AArch64::GPR32RegClass;
 618     ScaleFactor = 4;
 619     break;
 620   case MVT::i64:
 621     Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui;
 622     RC = &AArch64::GPR64RegClass;
 623     ScaleFactor = 8;
 624     break;
 625   case MVT::f32:
 626     Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui;
 627     RC = TLI.getRegClassFor(VT);
 628     ScaleFactor = 4;
 629     break;
 630   case MVT::f64:
 631     Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui;
 632     RC = TLI.getRegClassFor(VT);
 633     ScaleFactor = 8;
 634     break;
 635   }
 636   // Scale the offset.
 637   if (!UseUnscaled) {
 638     int64_t Offset = Addr.getOffset();
 639     if (Offset & (ScaleFactor - 1))
 640       // Retry using an unscaled, 9-bit, signed immediate offset.
 641       return EmitLoad(VT, ResultReg, Addr, /*UseUnscaled*/ true);
 642
 643     Addr.setOffset(Offset / ScaleFactor);
 644   }
 645
 646   // Simplify this down to something we can handle.
 647   if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
 648     return false;
 649
 650   // Create the base instruction, then add the operands.
 651   ResultReg = createResultReg(RC);
 652   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 653                                     TII.get(Opc), ResultReg);
 654   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled);
 655
 656   // Loading an i1 requires special handling.
 657   if (VTIsi1) {
 658     MRI.constrainRegClass(ResultReg, &AArch64::GPR32RegClass);
 659     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 660     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
 661             ANDReg)
 662         .addReg(ResultReg)
 663         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 664     ResultReg = ANDReg;
 665   }
 666   return true;
 667 }
 668
 669 bool AArch64FastISel::SelectLoad(const Instruction *I) {
 670   MVT VT;
 671   // Verify we have a legal type before going any further.  Currently, we handle
 672   // simple types that will directly fit in a register (i32/f32/i64/f64) or
 673   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
 674   if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic())
 675     return false;
 676
 677   // See if we can handle this address.
 678   Address Addr;
 679   if (!ComputeAddress(I->getOperand(0), Addr))
 680     return false;
 681
 682   unsigned ResultReg;
 683   if (!EmitLoad(VT, ResultReg, Addr))
 684     return false;
 685
 686   UpdateValueMap(I, ResultReg);
 687   return true;
 688 }
 689
 690 bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 691                                 bool UseUnscaled) {
 692   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
 693   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
 694   if (!UseUnscaled && Addr.getOffset() < 0)
 695     UseUnscaled = true;
 696
 697   unsigned StrOpc;
 698   bool VTIsi1 = false;
 699   int64_t ScaleFactor = 0;
 700   // Using scaled, 12-bit, unsigned immediate offsets.
 701   switch (VT.SimpleTy) {
 702   default:
 703     return false;
 704   case MVT::i1:
 705     VTIsi1 = true;
 706   case MVT::i8:
 707     StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui;
 708     ScaleFactor = 1;
 709     break;
 710   case MVT::i16:
 711     StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui;
 712     ScaleFactor = 2;
 713     break;
 714   case MVT::i32:
 715     StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui;
 716     ScaleFactor = 4;
 717     break;
 718   case MVT::i64:
 719     StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui;
 720     ScaleFactor = 8;
 721     break;
 722   case MVT::f32:
 723     StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui;
 724     ScaleFactor = 4;
 725     break;
 726   case MVT::f64:
 727     StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui;
 728     ScaleFactor = 8;
 729     break;
 730   }
 731   // Scale the offset.
 732   if (!UseUnscaled) {
 733     int64_t Offset = Addr.getOffset();
 734     if (Offset & (ScaleFactor - 1))
 735       // Retry using an unscaled, 9-bit, signed immediate offset.
 736       return EmitStore(VT, SrcReg, Addr, /*UseUnscaled*/ true);
 737
 738     Addr.setOffset(Offset / ScaleFactor);
 739   }
 740
 741   // Simplify this down to something we can handle.
 742   if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
 743     return false;
 744
 745   // Storing an i1 requires special handling.
 746   if (VTIsi1) {
 747     MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
 748     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 749     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
 750             ANDReg)
 751         .addReg(SrcReg)
 752         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 753     SrcReg = ANDReg;
 754   }
 755   // Create the base instruction, then add the operands.
 756   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 757                                     TII.get(StrOpc)).addReg(SrcReg);
 758   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled);
 759   return true;
 760 }
 761
 762 bool AArch64FastISel::SelectStore(const Instruction *I) {
 763   MVT VT;
 764   Value *Op0 = I->getOperand(0);
 765   // Verify we have a legal type before going any further.  Currently, we handle
 766   // simple types that will directly fit in a register (i32/f32/i64/f64) or
 767   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
 768   if (!isLoadStoreTypeLegal(Op0->getType(), VT) ||
 769       cast<StoreInst>(I)->isAtomic())
 770     return false;
 771
 772   // Get the value to be stored into a register.
 773   unsigned SrcReg = getRegForValue(Op0);
 774   if (SrcReg == 0)
 775     return false;
 776
 777   // See if we can handle this address.
 778   Address Addr;
 779   if (!ComputeAddress(I->getOperand(1), Addr))
 780     return false;
 781
 782   if (!EmitStore(VT, SrcReg, Addr))
 783     return false;
 784   return true;
 785 }
 786
 787 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
 788   switch (Pred) {
 789   case CmpInst::FCMP_ONE:
 790   case CmpInst::FCMP_UEQ:
 791   default:
 792     // AL is our "false" for now. The other two need more compares.
 793     return AArch64CC::AL;
 794   case CmpInst::ICMP_EQ:
 795   case CmpInst::FCMP_OEQ:
 796     return AArch64CC::EQ;
 797   case CmpInst::ICMP_SGT:
 798   case CmpInst::FCMP_OGT:
 799     return AArch64CC::GT;
 800   case CmpInst::ICMP_SGE:
 801   case CmpInst::FCMP_OGE:
 802     return AArch64CC::GE;
 803   case CmpInst::ICMP_UGT:
 804   case CmpInst::FCMP_UGT:
 805     return AArch64CC::HI;
 806   case CmpInst::FCMP_OLT:
 807     return AArch64CC::MI;
 808   case CmpInst::ICMP_ULE:
 809   case CmpInst::FCMP_OLE:
 810     return AArch64CC::LS;
 811   case CmpInst::FCMP_ORD:
 812     return AArch64CC::VC;
 813   case CmpInst::FCMP_UNO:
 814     return AArch64CC::VS;
 815   case CmpInst::FCMP_UGE:
 816     return AArch64CC::PL;
 817   case CmpInst::ICMP_SLT:
 818   case CmpInst::FCMP_ULT:
 819     return AArch64CC::LT;
 820   case CmpInst::ICMP_SLE:
 821   case CmpInst::FCMP_ULE:
 822     return AArch64CC::LE;
 823   case CmpInst::FCMP_UNE:
 824   case CmpInst::ICMP_NE:
 825     return AArch64CC::NE;
 826   case CmpInst::ICMP_UGE:
 827     return AArch64CC::HS;
 828   case CmpInst::ICMP_ULT:
 829     return AArch64CC::LO;
 830   }
 831 }
 832
 833 bool AArch64FastISel::SelectBranch(const Instruction *I) {
 834   const BranchInst *BI = cast<BranchInst>(I);
 835   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
 836   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
 837
 838   AArch64CC::CondCode CC = AArch64CC::NE;
 839   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
 840     if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
 841       // We may not handle every CC for now.
 842       CC = getCompareCC(CI->getPredicate());
 843       if (CC == AArch64CC::AL)
 844         return false;
 845
 846       // Emit the cmp.
 847       if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
 848         return false;
 849
 850       // Emit the branch.
 851       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 852           .addImm(CC)
 853           .addMBB(TBB);
 854
 855       // Obtain the branch weight and add the TrueBB to the successor list.
 856       uint32_t BranchWeight = 0;
 857       if (FuncInfo.BPI)
 858         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 859                                                   TBB->getBasicBlock());
 860       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
 861
 862       FastEmitBranch(FBB, DbgLoc);
 863       return true;
 864     }
 865   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
 866     MVT SrcVT;
 867     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
 868         (isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) {
 869       unsigned CondReg = getRegForValue(TI->getOperand(0));
 870       if (CondReg == 0)
 871         return false;
 872
 873       // Issue an extract_subreg to get the lower 32-bits.
 874       if (SrcVT == MVT::i64)
 875         CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true,
 876                                              AArch64::sub_32);
 877
 878       MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
 879       unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 880       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 881               TII.get(AArch64::ANDWri), ANDReg)
 882           .addReg(CondReg)
 883           .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 884       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 885               TII.get(AArch64::SUBSWri))
 886           .addReg(ANDReg)
 887           .addReg(ANDReg)
 888           .addImm(0)
 889           .addImm(0);
 890
 891       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
 892         std::swap(TBB, FBB);
 893         CC = AArch64CC::EQ;
 894       }
 895       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 896           .addImm(CC)
 897           .addMBB(TBB);
 898
 899       // Obtain the branch weight and add the TrueBB to the successor list.
 900       uint32_t BranchWeight = 0;
 901       if (FuncInfo.BPI)
 902         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 903                                                   TBB->getBasicBlock());
 904       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
 905
 906       FastEmitBranch(FBB, DbgLoc);
 907       return true;
 908     }
 909   } else if (const ConstantInt *CI =
 910                  dyn_cast<ConstantInt>(BI->getCondition())) {
 911     uint64_t Imm = CI->getZExtValue();
 912     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
 913     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
 914         .addMBB(Target);
 915
 916     // Obtain the branch weight and add the target to the successor list.
 917     uint32_t BranchWeight = 0;
 918     if (FuncInfo.BPI)
 919       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 920                                                  Target->getBasicBlock());
 921     FuncInfo.MBB->addSuccessor(Target, BranchWeight);
 922     return true;
 923   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
 924     // Fake request the condition, otherwise the intrinsic might be completely
 925     // optimized away.
 926     unsigned CondReg = getRegForValue(BI->getCondition());
 927     if (!CondReg)
 928       return false;
 929
 930     // Emit the branch.
 931     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 932       .addImm(CC)
 933       .addMBB(TBB);
 934
 935     // Obtain the branch weight and add the TrueBB to the successor list.
 936     uint32_t BranchWeight = 0;
 937     if (FuncInfo.BPI)
 938       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 939                                                  TBB->getBasicBlock());
 940     FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
 941
 942     FastEmitBranch(FBB, DbgLoc);
 943     return true;
 944   }
 945
 946   unsigned CondReg = getRegForValue(BI->getCondition());
 947   if (CondReg == 0)
 948     return false;
 949
 950   // We've been divorced from our compare!  Our block was split, and
 951   // now our compare lives in a predecessor block.  We musn't
 952   // re-compare here, as the children of the compare aren't guaranteed
 953   // live across the block boundary (we *could* check for this).
 954   // Regardless, the compare has been done in the predecessor block,
 955   // and it left a value for us in a virtual register.  Ergo, we test
 956   // the one-bit value left in the virtual register.
 957   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri),
 958           AArch64::WZR)
 959       .addReg(CondReg)
 960       .addImm(0)
 961       .addImm(0);
 962
 963   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
 964     std::swap(TBB, FBB);
 965     CC = AArch64CC::EQ;
 966   }
 967
 968   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 969       .addImm(CC)
 970       .addMBB(TBB);
 971
 972   // Obtain the branch weight and add the TrueBB to the successor list.
 973   uint32_t BranchWeight = 0;
 974   if (FuncInfo.BPI)
 975     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 976                                                TBB->getBasicBlock());
 977   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
 978
 979   FastEmitBranch(FBB, DbgLoc);
 980   return true;
 981 }
 982
 983 bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
 984   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
 985   unsigned AddrReg = getRegForValue(BI->getOperand(0));
 986   if (AddrReg == 0)
 987     return false;
 988
 989   // Emit the indirect branch.
 990   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BR))
 991       .addReg(AddrReg);
 992
 993   // Make sure the CFG is up-to-date.
 994   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
 995     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
 996
 997   return true;
 998 }
 999
1000 bool AArch64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
1001   Type *Ty = Src1Value->getType();
1002   EVT SrcEVT = TLI.getValueType(Ty, true);
1003   if (!SrcEVT.isSimple())
1004     return false;
1005   MVT SrcVT = SrcEVT.getSimpleVT();
1006
1007   // Check to see if the 2nd operand is a constant that we can encode directly
1008   // in the compare.
1009   uint64_t Imm;
1010   bool UseImm = false;
1011   bool isNegativeImm = false;
1012   if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
1013     if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
1014         SrcVT == MVT::i8 || SrcVT == MVT::i1) {
1015       const APInt &CIVal = ConstInt->getValue();
1016
1017       Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue();
1018       if (CIVal.isNegative()) {
1019         isNegativeImm = true;
1020         Imm = -Imm;
1021       }
1022       // FIXME: We can handle more immediates using shifts.
1023       UseImm = ((Imm & 0xfff) == Imm);
1024     }
1025   } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
1026     if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
1027       if (ConstFP->isZero() && !ConstFP->isNegative())
1028         UseImm = true;
1029   }
1030
1031   unsigned ZReg;
1032   unsigned CmpOpc;
1033   bool isICmp = true;
1034   bool needsExt = false;
1035   switch (SrcVT.SimpleTy) {
1036   default:
1037     return false;
1038   case MVT::i1:
1039   case MVT::i8:
1040   case MVT::i16:
1041     needsExt = true;
1042   // Intentional fall-through.
1043   case MVT::i32:
1044     ZReg = AArch64::WZR;
1045     if (UseImm)
1046       CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri;
1047     else
1048       CmpOpc = AArch64::SUBSWrr;
1049     break;
1050   case MVT::i64:
1051     ZReg = AArch64::XZR;
1052     if (UseImm)
1053       CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri;
1054     else
1055       CmpOpc = AArch64::SUBSXrr;
1056     break;
1057   case MVT::f32:
1058     isICmp = false;
1059     CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr;
1060     break;
1061   case MVT::f64:
1062     isICmp = false;
1063     CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr;
1064     break;
1065   }
1066
1067   unsigned SrcReg1 = getRegForValue(Src1Value);
1068   if (SrcReg1 == 0)
1069     return false;
1070
1071   unsigned SrcReg2;
1072   if (!UseImm) {
1073     SrcReg2 = getRegForValue(Src2Value);
1074     if (SrcReg2 == 0)
1075       return false;
1076   }
1077
1078   // We have i1, i8, or i16, we need to either zero extend or sign extend.
1079   if (needsExt) {
1080     SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
1081     if (SrcReg1 == 0)
1082       return false;
1083     if (!UseImm) {
1084       SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
1085       if (SrcReg2 == 0)
1086         return false;
1087     }
1088   }
1089
1090   if (isICmp) {
1091     if (UseImm)
1092       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1093           .addReg(ZReg)
1094           .addReg(SrcReg1)
1095           .addImm(Imm)
1096           .addImm(0);
1097     else
1098       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1099           .addReg(ZReg)
1100           .addReg(SrcReg1)
1101           .addReg(SrcReg2);
1102   } else {
1103     if (UseImm)
1104       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1105           .addReg(SrcReg1);
1106     else
1107       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1108           .addReg(SrcReg1)
1109           .addReg(SrcReg2);
1110   }
1111   return true;
1112 }
1113
1114 bool AArch64FastISel::SelectCmp(const Instruction *I) {
1115   const CmpInst *CI = cast<CmpInst>(I);
1116
1117   // We may not handle every CC for now.
1118   AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
1119   if (CC == AArch64CC::AL)
1120     return false;
1121
1122   // Emit the cmp.
1123   if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1124     return false;
1125
1126   // Now set a register based on the comparison.
1127   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
1128   unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
1129   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1130           ResultReg)
1131       .addReg(AArch64::WZR)
1132       .addReg(AArch64::WZR)
1133       .addImm(invertedCC);
1134
1135   UpdateValueMap(I, ResultReg);
1136   return true;
1137 }
1138
1139 bool AArch64FastISel::SelectSelect(const Instruction *I) {
1140   const SelectInst *SI = cast<SelectInst>(I);
1141
1142   EVT DestEVT = TLI.getValueType(SI->getType(), true);
1143   if (!DestEVT.isSimple())
1144     return false;
1145
1146   MVT DestVT = DestEVT.getSimpleVT();
1147   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
1148       DestVT != MVT::f64)
1149     return false;
1150
1151   unsigned SelectOpc;
1152   switch (DestVT.SimpleTy) {
1153   default: return false;
1154   case MVT::i32: SelectOpc = AArch64::CSELWr;    break;
1155   case MVT::i64: SelectOpc = AArch64::CSELXr;    break;
1156   case MVT::f32: SelectOpc = AArch64::FCSELSrrr; break;
1157   case MVT::f64: SelectOpc = AArch64::FCSELDrrr; break;
1158   }
1159
1160   const Value *Cond = SI->getCondition();
1161   bool NeedTest = true;
1162   AArch64CC::CondCode CC = AArch64CC::NE;
1163   if (foldXALUIntrinsic(CC, I, Cond))
1164     NeedTest = false;
1165
1166   unsigned CondReg = getRegForValue(Cond);
1167   if (!CondReg)
1168     return false;
1169   bool CondIsKill = hasTrivialKill(Cond);
1170
1171   if (NeedTest) {
1172     MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
1173     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
1174     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
1175             ANDReg)
1176       .addReg(CondReg, getKillRegState(CondIsKill))
1177       .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1178
1179     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri))
1180       .addReg(ANDReg)
1181       .addReg(ANDReg)
1182       .addImm(0)
1183       .addImm(0);
1184   }
1185
1186   unsigned TrueReg = getRegForValue(SI->getTrueValue());
1187   bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
1188
1189   unsigned FalseReg = getRegForValue(SI->getFalseValue());
1190   bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
1191
1192   if (!TrueReg || !FalseReg)
1193     return false;
1194
1195   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
1196   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc),
1197           ResultReg)
1198     .addReg(TrueReg, getKillRegState(TrueIsKill))
1199     .addReg(FalseReg, getKillRegState(FalseIsKill))
1200     .addImm(CC);
1201
1202   UpdateValueMap(I, ResultReg);
1203   return true;
1204 }
1205
1206 bool AArch64FastISel::SelectFPExt(const Instruction *I) {
1207   Value *V = I->getOperand(0);
1208   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
1209     return false;
1210
1211   unsigned Op = getRegForValue(V);
1212   if (Op == 0)
1213     return false;
1214
1215   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
1216   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
1217           ResultReg).addReg(Op);
1218   UpdateValueMap(I, ResultReg);
1219   return true;
1220 }
1221
1222 bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
1223   Value *V = I->getOperand(0);
1224   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
1225     return false;
1226
1227   unsigned Op = getRegForValue(V);
1228   if (Op == 0)
1229     return false;
1230
1231   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
1232   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
1233           ResultReg).addReg(Op);
1234   UpdateValueMap(I, ResultReg);
1235   return true;
1236 }
1237
1238 // FPToUI and FPToSI
1239 bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
1240   MVT DestVT;
1241   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1242     return false;
1243
1244   unsigned SrcReg = getRegForValue(I->getOperand(0));
1245   if (SrcReg == 0)
1246     return false;
1247
1248   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1249   if (SrcVT == MVT::f128)
1250     return false;
1251
1252   unsigned Opc;
1253   if (SrcVT == MVT::f64) {
1254     if (Signed)
1255       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
1256     else
1257       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
1258   } else {
1259     if (Signed)
1260       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
1261     else
1262       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
1263   }
1264   unsigned ResultReg = createResultReg(
1265       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
1266   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1267       .addReg(SrcReg);
1268   UpdateValueMap(I, ResultReg);
1269   return true;
1270 }
1271
1272 bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
1273   MVT DestVT;
1274   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1275     return false;
1276   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
1277           "Unexpected value type.");
1278
1279   unsigned SrcReg = getRegForValue(I->getOperand(0));
1280   if (SrcReg == 0)
1281     return false;
1282
1283   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1284
1285   // Handle sign-extension.
1286   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
1287     SrcReg =
1288         EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
1289     if (SrcReg == 0)
1290       return false;
1291   }
1292
1293   MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &AArch64::GPR64RegClass
1294                                                   : &AArch64::GPR32RegClass);
1295
1296   unsigned Opc;
1297   if (SrcVT == MVT::i64) {
1298     if (Signed)
1299       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
1300     else
1301       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
1302   } else {
1303     if (Signed)
1304       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
1305     else
1306       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
1307   }
1308
1309   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
1310   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1311       .addReg(SrcReg);
1312   UpdateValueMap(I, ResultReg);
1313   return true;
1314 }
1315
1316 bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
1317                                       SmallVectorImpl<MVT> &OutVTs,
1318                                       unsigned &NumBytes) {
1319   CallingConv::ID CC = CLI.CallConv;
1320   SmallVector<CCValAssign, 16> ArgLocs;
1321   CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
1322   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
1323
1324   // Get a count of how many bytes are to be pushed on the stack.
1325   NumBytes = CCInfo.getNextStackOffset();
1326
1327   // Issue CALLSEQ_START
1328   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1329   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
1330     .addImm(NumBytes);
1331
1332   // Process the args.
1333   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1334     CCValAssign &VA = ArgLocs[i];
1335     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
1336     MVT ArgVT = OutVTs[VA.getValNo()];
1337
1338     unsigned ArgReg = getRegForValue(ArgVal);
1339     if (!ArgReg)
1340       return false;
1341
1342     // Handle arg promotion: SExt, ZExt, AExt.
1343     switch (VA.getLocInfo()) {
1344     case CCValAssign::Full:
1345       break;
1346     case CCValAssign::SExt: {
1347       MVT DestVT = VA.getLocVT();
1348       MVT SrcVT = ArgVT;
1349       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
1350       if (!ArgReg)
1351         return false;
1352       break;
1353     }
1354     case CCValAssign::AExt:
1355     // Intentional fall-through.
1356     case CCValAssign::ZExt: {
1357       MVT DestVT = VA.getLocVT();
1358       MVT SrcVT = ArgVT;
1359       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
1360       if (!ArgReg)
1361         return false;
1362       break;
1363     }
1364     default:
1365       llvm_unreachable("Unknown arg promotion!");
1366     }
1367
1368     // Now copy/store arg to correct locations.
1369     if (VA.isRegLoc() && !VA.needsCustom()) {
1370       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1371               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
1372       CLI.OutRegs.push_back(VA.getLocReg());
1373     } else if (VA.needsCustom()) {
1374       // FIXME: Handle custom args.
1375       return false;
1376     } else {
1377       assert(VA.isMemLoc() && "Assuming store on stack.");
1378
1379       // Don't emit stores for undef values.
1380       if (isa<UndefValue>(ArgVal))
1381         continue;
1382
1383       // Need to store on the stack.
1384       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
1385
1386       unsigned BEAlign = 0;
1387       if (ArgSize < 8 && !Subtarget->isLittleEndian())
1388         BEAlign = 8 - ArgSize;
1389
1390       Address Addr;
1391       Addr.setKind(Address::RegBase);
1392       Addr.setReg(AArch64::SP);
1393       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
1394
1395       if (!EmitStore(ArgVT, ArgReg, Addr))
1396         return false;
1397     }
1398   }
1399   return true;
1400 }
1401
1402 bool AArch64FastISel::FinishCall(CallLoweringInfo &CLI, MVT RetVT,
1403                                  unsigned NumBytes) {
1404   CallingConv::ID CC = CLI.CallConv;
1405
1406   // Issue CALLSEQ_END
1407   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
1408   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
1409     .addImm(NumBytes).addImm(0);
1410
1411   // Now the return value.
1412   if (RetVT != MVT::isVoid) {
1413     SmallVector<CCValAssign, 16> RVLocs;
1414     CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
1415     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
1416
1417     // Only handle a single return value.
1418     if (RVLocs.size() != 1)
1419       return false;
1420
1421     // Copy all of the result registers out of their specified physreg.
1422     MVT CopyVT = RVLocs[0].getValVT();
1423     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
1424     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1425             TII.get(TargetOpcode::COPY), ResultReg)
1426       .addReg(RVLocs[0].getLocReg());
1427     CLI.InRegs.push_back(RVLocs[0].getLocReg());
1428
1429     CLI.ResultReg = ResultReg;
1430     CLI.NumResultRegs = 1;
1431   }
1432
1433   return true;
1434 }
1435
1436 bool AArch64FastISel::FastLowerCall(CallLoweringInfo &CLI) {
1437   CallingConv::ID CC  = CLI.CallConv;
1438   bool IsVarArg       = CLI.IsVarArg;
1439   const Value *Callee = CLI.Callee;
1440   const char *SymName = CLI.SymName;
1441
1442   CodeModel::Model CM = TM.getCodeModel();
1443   // Only support the small and large code model.
1444   if (CM != CodeModel::Small && CM != CodeModel::Large)
1445     return false;
1446
1447   // FIXME: Add large code model support for ELF.
1448   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
1449     return false;
1450
1451   // Let SDISel handle vararg functions.
1452   if (IsVarArg)
1453     return false;
1454
1455   // FIXME: Only handle *simple* calls for now.
1456   MVT RetVT;
1457   if (CLI.RetTy->isVoidTy())
1458     RetVT = MVT::isVoid;
1459   else if (!isTypeLegal(CLI.RetTy, RetVT))
1460     return false;
1461
1462   for (auto Flag : CLI.OutFlags)
1463     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
1464       return false;
1465
1466   // Set up the argument vectors.
1467   SmallVector<MVT, 16> OutVTs;
1468   OutVTs.reserve(CLI.OutVals.size());
1469
1470   for (auto *Val : CLI.OutVals) {
1471     MVT VT;
1472     if (!isTypeLegal(Val->getType(), VT) &&
1473         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
1474       return false;
1475
1476     // We don't handle vector parameters yet.
1477     if (VT.isVector() || VT.getSizeInBits() > 64)
1478       return false;
1479
1480     OutVTs.push_back(VT);
1481   }
1482
1483   Address Addr;
1484   if (!ComputeCallAddress(Callee, Addr))
1485     return false;
1486
1487   // Handle the arguments now that we've gotten them.
1488   unsigned NumBytes;
1489   if (!ProcessCallArgs(CLI, OutVTs, NumBytes))
1490     return false;
1491
1492   // Issue the call.
1493   MachineInstrBuilder MIB;
1494   if (CM == CodeModel::Small) {
1495     unsigned CallOpc = Addr.getReg() ? AArch64::BLR : AArch64::BL;
1496     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
1497     if (SymName)
1498       MIB.addExternalSymbol(SymName, 0);
1499     else if (Addr.getGlobalValue())
1500       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
1501     else if (Addr.getReg())
1502       MIB.addReg(Addr.getReg());
1503     else
1504       return false;
1505   } else {
1506     unsigned CallReg = 0;
1507     if (SymName) {
1508       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
1509       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
1510               ADRPReg)
1511         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
1512
1513       CallReg = createResultReg(&AArch64::GPR64RegClass);
1514       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
1515               CallReg)
1516         .addReg(ADRPReg)
1517         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
1518                            AArch64II::MO_NC);
1519     } else if (Addr.getGlobalValue()) {
1520       CallReg = AArch64MaterializeGV(Addr.getGlobalValue());
1521     } else if (Addr.getReg())
1522       CallReg = Addr.getReg();
1523
1524     if (!CallReg)
1525       return false;
1526
1527     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1528                   TII.get(AArch64::BLR)).addReg(CallReg);
1529   }
1530
1531   // Add implicit physical register uses to the call.
1532   for (auto Reg : CLI.OutRegs)
1533     MIB.addReg(Reg, RegState::Implicit);
1534
1535   // Add a register mask with the call-preserved registers.
1536   // Proper defs for return values will be added by setPhysRegsDeadExcept().
1537   MIB.addRegMask(TRI.getCallPreservedMask(CC));
1538
1539   CLI.Call = MIB;
1540
1541   // Finish off the call including any return values.
1542   return FinishCall(CLI, RetVT, NumBytes);
1543 }
1544
1545 bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
1546   if (Alignment)
1547     return Len / Alignment <= 4;
1548   else
1549     return Len < 32;
1550 }
1551
1552 bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
1553                                          uint64_t Len, unsigned Alignment) {
1554   // Make sure we don't bloat code by inlining very large memcpy's.
1555   if (!IsMemCpySmall(Len, Alignment))
1556     return false;
1557
1558   int64_t UnscaledOffset = 0;
1559   Address OrigDest = Dest;
1560   Address OrigSrc = Src;
1561
1562   while (Len) {
1563     MVT VT;
1564     if (!Alignment || Alignment >= 8) {
1565       if (Len >= 8)
1566         VT = MVT::i64;
1567       else if (Len >= 4)
1568         VT = MVT::i32;
1569       else if (Len >= 2)
1570         VT = MVT::i16;
1571       else {
1572         VT = MVT::i8;
1573       }
1574     } else {
1575       // Bound based on alignment.
1576       if (Len >= 4 && Alignment == 4)
1577         VT = MVT::i32;
1578       else if (Len >= 2 && Alignment == 2)
1579         VT = MVT::i16;
1580       else {
1581         VT = MVT::i8;
1582       }
1583     }
1584
1585     bool RV;
1586     unsigned ResultReg;
1587     RV = EmitLoad(VT, ResultReg, Src);
1588     if (!RV)
1589       return false;
1590
1591     RV = EmitStore(VT, ResultReg, Dest);
1592     if (!RV)
1593       return false;
1594
1595     int64_t Size = VT.getSizeInBits() / 8;
1596     Len -= Size;
1597     UnscaledOffset += Size;
1598
1599     // We need to recompute the unscaled offset for each iteration.
1600     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
1601     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
1602   }
1603
1604   return true;
1605 }
1606
1607 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
1608 /// into the user. The condition code will only be updated on success.
1609 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
1610                                         const Instruction *I,
1611                                         const Value *Cond) {
1612   if (!isa<ExtractValueInst>(Cond))
1613     return false;
1614
1615   const auto *EV = cast<ExtractValueInst>(Cond);
1616   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
1617     return false;
1618
1619   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
1620   MVT RetVT;
1621   const Function *Callee = II->getCalledFunction();
1622   Type *RetTy =
1623   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
1624   if (!isTypeLegal(RetTy, RetVT))
1625     return false;
1626
1627   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1628     return false;
1629
1630   AArch64CC::CondCode TmpCC;
1631   switch (II->getIntrinsicID()) {
1632     default: return false;
1633     case Intrinsic::sadd_with_overflow:
1634     case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break;
1635     case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break;
1636     case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break;
1637     case Intrinsic::smul_with_overflow:
1638     case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break;
1639   }
1640
1641   // Check if both instructions are in the same basic block.
1642   if (II->getParent() != I->getParent())
1643     return false;
1644
1645   // Make sure nothing is in the way
1646   BasicBlock::const_iterator Start = I;
1647   BasicBlock::const_iterator End = II;
1648   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
1649     // We only expect extractvalue instructions between the intrinsic and the
1650     // instruction to be selected.
1651     if (!isa<ExtractValueInst>(Itr))
1652       return false;
1653
1654     // Check that the extractvalue operand comes from the intrinsic.
1655     const auto *EVI = cast<ExtractValueInst>(Itr);
1656     if (EVI->getAggregateOperand() != II)
1657       return false;
1658   }
1659
1660   CC = TmpCC;
1661   return true;
1662 }
1663
1664 bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
1665   // FIXME: Handle more intrinsics.
1666   switch (II->getIntrinsicID()) {
1667   default: return false;
1668   case Intrinsic::frameaddress: {
1669     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
1670     MFI->setFrameAddressIsTaken(true);
1671
1672     const AArch64RegisterInfo *RegInfo =
1673       static_cast<const AArch64RegisterInfo *>(TM.getRegisterInfo());
1674     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
1675     unsigned SrcReg = FramePtr;
1676
1677     // Recursively load frame address
1678     // ldr x0, [fp]
1679     // ldr x0, [x0]
1680     // ldr x0, [x0]
1681     // ...
1682     unsigned DestReg;
1683     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
1684     while (Depth--) {
1685       DestReg = createResultReg(&AArch64::GPR64RegClass);
1686       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1687               TII.get(AArch64::LDRXui), DestReg)
1688         .addReg(SrcReg).addImm(0);
1689       SrcReg = DestReg;
1690     }
1691
1692     UpdateValueMap(II, SrcReg);
1693     return true;
1694   }
1695   case Intrinsic::memcpy:
1696   case Intrinsic::memmove: {
1697     const auto *MTI = cast<MemTransferInst>(II);
1698     // Don't handle volatile.
1699     if (MTI->isVolatile())
1700       return false;
1701
1702     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
1703     // we would emit dead code because we don't currently handle memmoves.
1704     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
1705     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
1706       // Small memcpy's are common enough that we want to do them without a call
1707       // if possible.
1708       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
1709       unsigned Alignment = MTI->getAlignment();
1710       if (IsMemCpySmall(Len, Alignment)) {
1711         Address Dest, Src;
1712         if (!ComputeAddress(MTI->getRawDest(), Dest) ||
1713             !ComputeAddress(MTI->getRawSource(), Src))
1714           return false;
1715         if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
1716           return true;
1717       }
1718     }
1719
1720     if (!MTI->getLength()->getType()->isIntegerTy(64))
1721       return false;
1722
1723     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
1724       // Fast instruction selection doesn't support the special
1725       // address spaces.
1726       return false;
1727
1728     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
1729     return LowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
1730   }
1731   case Intrinsic::memset: {
1732     const MemSetInst *MSI = cast<MemSetInst>(II);
1733     // Don't handle volatile.
1734     if (MSI->isVolatile())
1735       return false;
1736
1737     if (!MSI->getLength()->getType()->isIntegerTy(64))
1738       return false;
1739
1740     if (MSI->getDestAddressSpace() > 255)
1741       // Fast instruction selection doesn't support the special
1742       // address spaces.
1743       return false;
1744
1745     return LowerCallTo(II, "memset", II->getNumArgOperands() - 2);
1746   }
1747   case Intrinsic::trap: {
1748     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
1749         .addImm(1);
1750     return true;
1751   }
1752   case Intrinsic::sqrt: {
1753     Type *RetTy = II->getCalledFunction()->getReturnType();
1754
1755     MVT VT;
1756     if (!isTypeLegal(RetTy, VT))
1757       return false;
1758
1759     unsigned Op0Reg = getRegForValue(II->getOperand(0));
1760     if (!Op0Reg)
1761       return false;
1762     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
1763
1764     unsigned ResultReg = FastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
1765     if (!ResultReg)
1766       return false;
1767
1768     UpdateValueMap(II, ResultReg);
1769     return true;
1770   }
1771   case Intrinsic::sadd_with_overflow:
1772   case Intrinsic::uadd_with_overflow:
1773   case Intrinsic::ssub_with_overflow:
1774   case Intrinsic::usub_with_overflow:
1775   case Intrinsic::smul_with_overflow:
1776   case Intrinsic::umul_with_overflow: {
1777     // This implements the basic lowering of the xalu with overflow intrinsics.
1778     const Function *Callee = II->getCalledFunction();
1779     auto *Ty = cast<StructType>(Callee->getReturnType());
1780     Type *RetTy = Ty->getTypeAtIndex(0U);
1781     Type *CondTy = Ty->getTypeAtIndex(1);
1782
1783     MVT VT;
1784     if (!isTypeLegal(RetTy, VT))
1785       return false;
1786
1787     if (VT != MVT::i32 && VT != MVT::i64)
1788       return false;
1789
1790     const Value *LHS = II->getArgOperand(0);
1791     const Value *RHS = II->getArgOperand(1);
1792     // Canonicalize immediate to the RHS.
1793     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
1794         isCommutativeIntrinsic(II))
1795       std::swap(LHS, RHS);
1796
1797     unsigned LHSReg = getRegForValue(LHS);
1798     if (!LHSReg)
1799       return false;
1800     bool LHSIsKill = hasTrivialKill(LHS);
1801
1802     // Check if the immediate can be encoded in the instruction and if we should
1803     // invert the instruction (adds -> subs) to handle negative immediates.
1804     bool UseImm = false;
1805     bool UseInverse = false;
1806     uint64_t Imm = 0;
1807     if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1808       if (C->isNegative()) {
1809         UseInverse = true;
1810         Imm = -(C->getSExtValue());
1811       } else
1812         Imm = C->getZExtValue();
1813
1814       if (isUInt<12>(Imm))
1815         UseImm = true;
1816
1817       UseInverse = UseImm && UseInverse;
1818     }
1819
1820     static const unsigned OpcTable[2][2][2] = {
1821       { {AArch64::ADDSWrr, AArch64::ADDSXrr},
1822         {AArch64::ADDSWri, AArch64::ADDSXri} },
1823       { {AArch64::SUBSWrr, AArch64::SUBSXrr},
1824         {AArch64::SUBSWri, AArch64::SUBSXri} }
1825     };
1826     unsigned Opc = 0;
1827     unsigned MulReg = 0;
1828     unsigned RHSReg = 0;
1829     bool RHSIsKill = false;
1830     AArch64CC::CondCode CC = AArch64CC::Invalid;
1831     bool Is64Bit = VT == MVT::i64;
1832     switch (II->getIntrinsicID()) {
1833     default: llvm_unreachable("Unexpected intrinsic!");
1834     case Intrinsic::sadd_with_overflow:
1835       Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
1836     case Intrinsic::uadd_with_overflow:
1837       Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::HS; break;
1838     case Intrinsic::ssub_with_overflow:
1839       Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
1840     case Intrinsic::usub_with_overflow:
1841       Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::LO; break;
1842     case Intrinsic::smul_with_overflow: {
1843       CC = AArch64CC::NE;
1844       RHSReg = getRegForValue(RHS);
1845       if (!RHSReg)
1846         return false;
1847       RHSIsKill = hasTrivialKill(RHS);
1848
1849       if (VT == MVT::i32) {
1850         MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1851         unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32);
1852         MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
1853                                             AArch64::sub_32);
1854         ShiftReg = FastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
1855                                               AArch64::sub_32);
1856         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
1857         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1858                 TII.get(AArch64::SUBSWrs), CmpReg)
1859           .addReg(ShiftReg, getKillRegState(true))
1860           .addReg(MulReg, getKillRegState(false))
1861           .addImm(159); // 159 <-> asr #31
1862       } else {
1863         assert(VT == MVT::i64 && "Unexpected value type.");
1864         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1865         unsigned SMULHReg = FastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
1866                                         RHSReg, RHSIsKill);
1867         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
1868         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1869                 TII.get(AArch64::SUBSXrs), CmpReg)
1870           .addReg(SMULHReg, getKillRegState(true))
1871           .addReg(MulReg, getKillRegState(false))
1872           .addImm(191); // 191 <-> asr #63
1873       }
1874       break;
1875     }
1876     case Intrinsic::umul_with_overflow: {
1877       CC = AArch64CC::NE;
1878       RHSReg = getRegForValue(RHS);
1879       if (!RHSReg)
1880         return false;
1881       RHSIsKill = hasTrivialKill(RHS);
1882
1883       if (VT == MVT::i32) {
1884         MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1885         unsigned CmpReg = createResultReg(TLI.getRegClassFor(MVT::i64));
1886         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1887                 TII.get(AArch64::SUBSXrs), CmpReg)
1888           .addReg(AArch64::XZR, getKillRegState(true))
1889           .addReg(MulReg, getKillRegState(false))
1890           .addImm(96); // 96 <-> lsr #32
1891         MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
1892                                             AArch64::sub_32);
1893       } else {
1894         assert(VT == MVT::i64 && "Unexpected value type.");
1895         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1896         unsigned UMULHReg = FastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
1897                                         RHSReg, RHSIsKill);
1898         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
1899         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1900                 TII.get(AArch64::SUBSXrr), CmpReg)
1901         .addReg(AArch64::XZR, getKillRegState(true))
1902         .addReg(UMULHReg, getKillRegState(false));
1903       }
1904       break;
1905     }
1906     }
1907
1908     if (!UseImm) {
1909       RHSReg = getRegForValue(RHS);
1910       if (!RHSReg)
1911         return false;
1912       RHSIsKill = hasTrivialKill(RHS);
1913     }
1914
1915     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
1916     if (Opc) {
1917       MachineInstrBuilder MIB;
1918       MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
1919                     ResultReg)
1920               .addReg(LHSReg, getKillRegState(LHSIsKill));
1921       if (UseImm) {
1922         MIB.addImm(Imm);
1923         MIB.addImm(0);
1924       } else
1925         MIB.addReg(RHSReg, getKillRegState(RHSIsKill));
1926     }
1927     else
1928       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1929               TII.get(TargetOpcode::COPY), ResultReg)
1930         .addReg(MulReg);
1931
1932     unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
1933     assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
1934     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1935             ResultReg2)
1936       .addReg(AArch64::WZR, getKillRegState(true))
1937       .addReg(AArch64::WZR, getKillRegState(true))
1938       .addImm(getInvertedCondCode(CC));
1939
1940     UpdateValueMap(II, ResultReg, 2);
1941     return true;
1942   }
1943   }
1944   return false;
1945 }
1946
1947 bool AArch64FastISel::SelectRet(const Instruction *I) {
1948   const ReturnInst *Ret = cast<ReturnInst>(I);
1949   const Function &F = *I->getParent()->getParent();
1950
1951   if (!FuncInfo.CanLowerReturn)
1952     return false;
1953
1954   if (F.isVarArg())
1955     return false;
1956
1957   // Build a list of return value registers.
1958   SmallVector<unsigned, 4> RetRegs;
1959
1960   if (Ret->getNumOperands() > 0) {
1961     CallingConv::ID CC = F.getCallingConv();
1962     SmallVector<ISD::OutputArg, 4> Outs;
1963     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
1964
1965     // Analyze operands of the call, assigning locations to each operand.
1966     SmallVector<CCValAssign, 16> ValLocs;
1967     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
1968                    I->getContext());
1969     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
1970                                                      : RetCC_AArch64_AAPCS;
1971     CCInfo.AnalyzeReturn(Outs, RetCC);
1972
1973     // Only handle a single return value for now.
1974     if (ValLocs.size() != 1)
1975       return false;
1976
1977     CCValAssign &VA = ValLocs[0];
1978     const Value *RV = Ret->getOperand(0);
1979
1980     // Don't bother handling odd stuff for now.
1981     if (VA.getLocInfo() != CCValAssign::Full)
1982       return false;
1983     // Only handle register returns for now.
1984     if (!VA.isRegLoc())
1985       return false;
1986     unsigned Reg = getRegForValue(RV);
1987     if (Reg == 0)
1988       return false;
1989
1990     unsigned SrcReg = Reg + VA.getValNo();
1991     unsigned DestReg = VA.getLocReg();
1992     // Avoid a cross-class copy. This is very unlikely.
1993     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
1994       return false;
1995
1996     EVT RVEVT = TLI.getValueType(RV->getType());
1997     if (!RVEVT.isSimple())
1998       return false;
1999
2000     // Vectors (of > 1 lane) in big endian need tricky handling.
2001     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
2002       return false;
2003
2004     MVT RVVT = RVEVT.getSimpleVT();
2005     if (RVVT == MVT::f128)
2006       return false;
2007     MVT DestVT = VA.getValVT();
2008     // Special handling for extended integers.
2009     if (RVVT != DestVT) {
2010       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2011         return false;
2012
2013       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
2014         return false;
2015
2016       bool isZExt = Outs[0].Flags.isZExt();
2017       SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
2018       if (SrcReg == 0)
2019         return false;
2020     }
2021
2022     // Make the copy.
2023     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2024             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
2025
2026     // Add register to return instruction.
2027     RetRegs.push_back(VA.getLocReg());
2028   }
2029
2030   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2031                                     TII.get(AArch64::RET_ReallyLR));
2032   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
2033     MIB.addReg(RetRegs[i], RegState::Implicit);
2034   return true;
2035 }
2036
2037 bool AArch64FastISel::SelectTrunc(const Instruction *I) {
2038   Type *DestTy = I->getType();
2039   Value *Op = I->getOperand(0);
2040   Type *SrcTy = Op->getType();
2041
2042   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2043   EVT DestEVT = TLI.getValueType(DestTy, true);
2044   if (!SrcEVT.isSimple())
2045     return false;
2046   if (!DestEVT.isSimple())
2047     return false;
2048
2049   MVT SrcVT = SrcEVT.getSimpleVT();
2050   MVT DestVT = DestEVT.getSimpleVT();
2051
2052   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2053       SrcVT != MVT::i8)
2054     return false;
2055   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
2056       DestVT != MVT::i1)
2057     return false;
2058
2059   unsigned SrcReg = getRegForValue(Op);
2060   if (!SrcReg)
2061     return false;
2062
2063   // If we're truncating from i64 to a smaller non-legal type then generate an
2064   // AND.  Otherwise, we know the high bits are undefined and a truncate doesn't
2065   // generate any code.
2066   if (SrcVT == MVT::i64) {
2067     uint64_t Mask = 0;
2068     switch (DestVT.SimpleTy) {
2069     default:
2070       // Trunc i64 to i32 is handled by the target-independent fast-isel.
2071       return false;
2072     case MVT::i1:
2073       Mask = 0x1;
2074       break;
2075     case MVT::i8:
2076       Mask = 0xff;
2077       break;
2078     case MVT::i16:
2079       Mask = 0xffff;
2080       break;
2081     }
2082     // Issue an extract_subreg to get the lower 32-bits.
2083     unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true,
2084                                                 AArch64::sub_32);
2085     MRI.constrainRegClass(Reg32, &AArch64::GPR32RegClass);
2086     // Create the AND instruction which performs the actual truncation.
2087     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
2088     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
2089             ANDReg)
2090         .addReg(Reg32)
2091         .addImm(AArch64_AM::encodeLogicalImmediate(Mask, 32));
2092     SrcReg = ANDReg;
2093   }
2094
2095   UpdateValueMap(I, SrcReg);
2096   return true;
2097 }
2098
2099 unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
2100   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
2101           DestVT == MVT::i64) &&
2102          "Unexpected value type.");
2103   // Handle i8 and i16 as i32.
2104   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2105     DestVT = MVT::i32;
2106
2107   if (isZExt) {
2108     MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2109     unsigned ResultReg = createResultReg(&AArch64::GPR32spRegClass);
2110     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
2111             ResultReg)
2112         .addReg(SrcReg)
2113         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2114
2115     if (DestVT == MVT::i64) {
2116       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
2117       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
2118       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2119       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2120               TII.get(AArch64::SUBREG_TO_REG), Reg64)
2121           .addImm(0)
2122           .addReg(ResultReg)
2123           .addImm(AArch64::sub_32);
2124       ResultReg = Reg64;
2125     }
2126     return ResultReg;
2127   } else {
2128     if (DestVT == MVT::i64) {
2129       // FIXME: We're SExt i1 to i64.
2130       return 0;
2131     }
2132     unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
2133     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SBFMWri),
2134             ResultReg)
2135         .addReg(SrcReg)
2136         .addImm(0)
2137         .addImm(0);
2138     return ResultReg;
2139   }
2140 }
2141
2142 unsigned AArch64FastISel::Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2143                                       unsigned Op1, bool Op1IsKill) {
2144   unsigned Opc, ZReg;
2145   switch (RetVT.SimpleTy) {
2146   default: return 0;
2147   case MVT::i8:
2148   case MVT::i16:
2149   case MVT::i32:
2150     RetVT = MVT::i32;
2151     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
2152   case MVT::i64:
2153     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
2154   }
2155
2156   // Create the base instruction, then add the operands.
2157   unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
2158   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2159     .addReg(Op0, getKillRegState(Op0IsKill))
2160     .addReg(Op1, getKillRegState(Op1IsKill))
2161     .addReg(ZReg, getKillRegState(true));
2162
2163   return ResultReg;
2164 }
2165
2166 unsigned AArch64FastISel::Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2167                                         unsigned Op1, bool Op1IsKill) {
2168   if (RetVT != MVT::i64)
2169     return 0;
2170
2171   // Create the base instruction, then add the operands.
2172   unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
2173   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SMADDLrrr),
2174           ResultReg)
2175     .addReg(Op0, getKillRegState(Op0IsKill))
2176     .addReg(Op1, getKillRegState(Op1IsKill))
2177     .addReg(AArch64::XZR, getKillRegState(true));
2178
2179   return ResultReg;
2180 }
2181
2182 unsigned AArch64FastISel::Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2183                                         unsigned Op1, bool Op1IsKill) {
2184   if (RetVT != MVT::i64)
2185     return 0;
2186
2187   // Create the base instruction, then add the operands.
2188   unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
2189   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::UMADDLrrr),
2190           ResultReg)
2191     .addReg(Op0, getKillRegState(Op0IsKill))
2192     .addReg(Op1, getKillRegState(Op1IsKill))
2193     .addReg(AArch64::XZR, getKillRegState(true));
2194
2195   return ResultReg;
2196 }
2197
2198 unsigned AArch64FastISel::Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2199                                       uint64_t Shift) {
2200   unsigned Opc, ImmR, ImmS;
2201   switch (RetVT.SimpleTy) {
2202   default: return 0;
2203   case MVT::i8:
2204   case MVT::i16:
2205   case MVT::i32:
2206     RetVT = MVT::i32;
2207     Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 31 - Shift; break;
2208   case MVT::i64:
2209     Opc = AArch64::UBFMXri; ImmR = -Shift % 64; ImmS = 63 - Shift; break;
2210   }
2211
2212   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, ImmR,
2213                           ImmS);
2214 }
2215
2216 unsigned AArch64FastISel::Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2217                                       uint64_t Shift) {
2218   unsigned Opc, ImmS;
2219   switch (RetVT.SimpleTy) {
2220   default: return 0;
2221   case MVT::i8:
2222   case MVT::i16:
2223   case MVT::i32:
2224     RetVT = MVT::i32;
2225     Opc = AArch64::UBFMWri; ImmS = 31; break;
2226   case MVT::i64:
2227     Opc = AArch64::UBFMXri; ImmS = 63; break;
2228   }
2229
2230   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
2231                           ImmS);
2232 }
2233
2234 unsigned AArch64FastISel::Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2235                                       uint64_t Shift) {
2236   unsigned Opc, ImmS;
2237   switch (RetVT.SimpleTy) {
2238   default: return 0;
2239   case MVT::i8:
2240   case MVT::i16:
2241   case MVT::i32:
2242     RetVT = MVT::i32;
2243     Opc = AArch64::SBFMWri; ImmS = 31; break;
2244   case MVT::i64:
2245     Opc = AArch64::SBFMXri; ImmS = 63; break;
2246   }
2247
2248   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
2249                           ImmS);
2250 }
2251
2252 unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
2253                                      bool isZExt) {
2254   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
2255
2256   // FastISel does not have plumbing to deal with extensions where the SrcVT or
2257   // DestVT are odd things, so test to make sure that they are both types we can
2258   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
2259   // bail out to SelectionDAG.
2260   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
2261        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
2262       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
2263        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
2264     return 0;
2265
2266   unsigned Opc;
2267   unsigned Imm = 0;
2268
2269   switch (SrcVT.SimpleTy) {
2270   default:
2271     return 0;
2272   case MVT::i1:
2273     return Emiti1Ext(SrcReg, DestVT, isZExt);
2274   case MVT::i8:
2275     if (DestVT == MVT::i64)
2276       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2277     else
2278       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
2279     Imm = 7;
2280     break;
2281   case MVT::i16:
2282     if (DestVT == MVT::i64)
2283       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2284     else
2285       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
2286     Imm = 15;
2287     break;
2288   case MVT::i32:
2289     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
2290     Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2291     Imm = 31;
2292     break;
2293   }
2294
2295   // Handle i8 and i16 as i32.
2296   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2297     DestVT = MVT::i32;
2298   else if (DestVT == MVT::i64) {
2299     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2300     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2301             TII.get(AArch64::SUBREG_TO_REG), Src64)
2302         .addImm(0)
2303         .addReg(SrcReg)
2304         .addImm(AArch64::sub_32);
2305     SrcReg = Src64;
2306   }
2307
2308   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2309   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2310       .addReg(SrcReg)
2311       .addImm(0)
2312       .addImm(Imm);
2313
2314   return ResultReg;
2315 }
2316
2317 bool AArch64FastISel::SelectIntExt(const Instruction *I) {
2318   // On ARM, in general, integer casts don't involve legal types; this code
2319   // handles promotable integers.  The high bits for a type smaller than
2320   // the register size are assumed to be undefined.
2321   Type *DestTy = I->getType();
2322   Value *Src = I->getOperand(0);
2323   Type *SrcTy = Src->getType();
2324
2325   bool isZExt = isa<ZExtInst>(I);
2326   unsigned SrcReg = getRegForValue(Src);
2327   if (!SrcReg)
2328     return false;
2329
2330   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2331   EVT DestEVT = TLI.getValueType(DestTy, true);
2332   if (!SrcEVT.isSimple())
2333     return false;
2334   if (!DestEVT.isSimple())
2335     return false;
2336
2337   MVT SrcVT = SrcEVT.getSimpleVT();
2338   MVT DestVT = DestEVT.getSimpleVT();
2339   unsigned ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2340   if (ResultReg == 0)
2341     return false;
2342   UpdateValueMap(I, ResultReg);
2343   return true;
2344 }
2345
2346 bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
2347   EVT DestEVT = TLI.getValueType(I->getType(), true);
2348   if (!DestEVT.isSimple())
2349     return false;
2350
2351   MVT DestVT = DestEVT.getSimpleVT();
2352   if (DestVT != MVT::i64 && DestVT != MVT::i32)
2353     return false;
2354
2355   unsigned DivOpc;
2356   bool is64bit = (DestVT == MVT::i64);
2357   switch (ISDOpcode) {
2358   default:
2359     return false;
2360   case ISD::SREM:
2361     DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
2362     break;
2363   case ISD::UREM:
2364     DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
2365     break;
2366   }
2367   unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
2368   unsigned Src0Reg = getRegForValue(I->getOperand(0));
2369   if (!Src0Reg)
2370     return false;
2371
2372   unsigned Src1Reg = getRegForValue(I->getOperand(1));
2373   if (!Src1Reg)
2374     return false;
2375
2376   unsigned QuotReg = createResultReg(TLI.getRegClassFor(DestVT));
2377   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), QuotReg)
2378       .addReg(Src0Reg)
2379       .addReg(Src1Reg);
2380   // The remainder is computed as numerator - (quotient * denominator) using the
2381   // MSUB instruction.
2382   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2383   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg)
2384       .addReg(QuotReg)
2385       .addReg(Src1Reg)
2386       .addReg(Src0Reg);
2387   UpdateValueMap(I, ResultReg);
2388   return true;
2389 }
2390
2391 bool AArch64FastISel::SelectMul(const Instruction *I) {
2392   EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2393   if (!SrcEVT.isSimple())
2394     return false;
2395   MVT SrcVT = SrcEVT.getSimpleVT();
2396
2397   // Must be simple value type.  Don't handle vectors.
2398   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2399       SrcVT != MVT::i8)
2400     return false;
2401
2402   unsigned Src0Reg = getRegForValue(I->getOperand(0));
2403   if (!Src0Reg)
2404     return false;
2405   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
2406
2407   unsigned Src1Reg = getRegForValue(I->getOperand(1));
2408   if (!Src1Reg)
2409     return false;
2410   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
2411
2412   unsigned ResultReg =
2413     Emit_MUL_rr(SrcVT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
2414
2415   if (!ResultReg)
2416     return false;
2417
2418   UpdateValueMap(I, ResultReg);
2419   return true;
2420 }
2421
2422 bool AArch64FastISel::SelectShift(const Instruction *I, bool IsLeftShift,
2423                                   bool IsArithmetic) {
2424   EVT RetEVT = TLI.getValueType(I->getType(), true);
2425   if (!RetEVT.isSimple())
2426     return false;
2427   MVT RetVT = RetEVT.getSimpleVT();
2428
2429   if (!isa<ConstantInt>(I->getOperand(1)))
2430     return false;
2431
2432   unsigned Op0Reg = getRegForValue(I->getOperand(0));
2433   if (!Op0Reg)
2434     return false;
2435   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
2436
2437   uint64_t ShiftVal = cast<ConstantInt>(I->getOperand(1))->getZExtValue();
2438
2439   unsigned ResultReg;
2440   if (IsLeftShift)
2441     ResultReg = Emit_LSL_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2442   else {
2443     if (IsArithmetic)
2444       ResultReg = Emit_ASR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2445     else
2446       ResultReg = Emit_LSR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2447   }
2448
2449   if (!ResultReg)
2450     return false;
2451
2452   UpdateValueMap(I, ResultReg);
2453   return true;
2454 }
2455
2456 bool AArch64FastISel::SelectBitCast(const Instruction *I) {
2457   MVT RetVT, SrcVT;
2458
2459   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
2460     return false;
2461   if (!isTypeLegal(I->getType(), RetVT))
2462     return false;
2463
2464   unsigned Opc;
2465   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
2466     Opc = AArch64::FMOVWSr;
2467   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
2468     Opc = AArch64::FMOVXDr;
2469   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
2470     Opc = AArch64::FMOVSWr;
2471   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
2472     Opc = AArch64::FMOVDXr;
2473   else
2474     return false;
2475
2476   unsigned Op0Reg = getRegForValue(I->getOperand(0));
2477   if (!Op0Reg)
2478     return false;
2479   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
2480   unsigned ResultReg = FastEmitInst_r(Opc, TLI.getRegClassFor(RetVT),
2481                                       Op0Reg, Op0IsKill);
2482
2483   if (!ResultReg)
2484     return false;
2485
2486   UpdateValueMap(I, ResultReg);
2487   return true;
2488 }
2489
2490 bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
2491   switch (I->getOpcode()) {
2492   default:
2493     break;
2494   case Instruction::Load:
2495     return SelectLoad(I);
2496   case Instruction::Store:
2497     return SelectStore(I);
2498   case Instruction::Br:
2499     return SelectBranch(I);
2500   case Instruction::IndirectBr:
2501     return SelectIndirectBr(I);
2502   case Instruction::FCmp:
2503   case Instruction::ICmp:
2504     return SelectCmp(I);
2505   case Instruction::Select:
2506     return SelectSelect(I);
2507   case Instruction::FPExt:
2508     return SelectFPExt(I);
2509   case Instruction::FPTrunc:
2510     return SelectFPTrunc(I);
2511   case Instruction::FPToSI:
2512     return SelectFPToInt(I, /*Signed=*/true);
2513   case Instruction::FPToUI:
2514     return SelectFPToInt(I, /*Signed=*/false);
2515   case Instruction::SIToFP:
2516     return SelectIntToFP(I, /*Signed=*/true);
2517   case Instruction::UIToFP:
2518     return SelectIntToFP(I, /*Signed=*/false);
2519   case Instruction::SRem:
2520     return SelectRem(I, ISD::SREM);
2521   case Instruction::URem:
2522     return SelectRem(I, ISD::UREM);
2523   case Instruction::Ret:
2524     return SelectRet(I);
2525   case Instruction::Trunc:
2526     return SelectTrunc(I);
2527   case Instruction::ZExt:
2528   case Instruction::SExt:
2529     return SelectIntExt(I);
2530
2531   // FIXME: All of these should really be handled by the target-independent
2532   // selector -> improve FastISel tblgen.
2533   case Instruction::Mul:
2534     return SelectMul(I);
2535   case Instruction::Shl:
2536       return SelectShift(I, /*IsLeftShift=*/true, /*IsArithmetic=*/false);
2537   case Instruction::LShr:
2538     return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/false);
2539   case Instruction::AShr:
2540     return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/true);
2541   case Instruction::BitCast:
2542     return SelectBitCast(I);
2543   }
2544   return false;
2545   // Silence warnings.
2546   (void)&CC_AArch64_DarwinPCS_VarArg;
2547 }
2548
2549 namespace llvm {
2550 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
2551                                         const TargetLibraryInfo *libInfo) {
2552   return new AArch64FastISel(funcInfo, libInfo);
2553 }
2554 }