lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/Analysis/BranchProbabilityInfo.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/FastISel.h"
  23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  24 #include "llvm/CodeGen/MachineConstantPool.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/IR/CallingConv.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/GetElementPtrTypeIterator.h"
  33 #include "llvm/IR/GlobalAlias.h"
  34 #include "llvm/IR/GlobalVariable.h"
  35 #include "llvm/IR/Instructions.h"
  36 #include "llvm/IR/IntrinsicInst.h"
  37 #include "llvm/IR/Operator.h"
  38 #include "llvm/Support/CommandLine.h"
  39 using namespace llvm;
  40
  41 namespace {
  42
  43 class AArch64FastISel : public FastISel {
  44
  45   class Address {
  46   public:
  47     typedef enum {
  48       RegBase,
  49       FrameIndexBase
  50     } BaseKind;
  51
  52   private:
  53     BaseKind Kind;
  54     union {
  55       unsigned Reg;
  56       int FI;
  57     } Base;
  58     int64_t Offset;
  59     const GlobalValue *GV;
  60
  61   public:
  62     Address() : Kind(RegBase), Offset(0), GV(nullptr) { Base.Reg = 0; }
  63     void setKind(BaseKind K) { Kind = K; }
  64     BaseKind getKind() const { return Kind; }
  65     bool isRegBase() const { return Kind == RegBase; }
  66     bool isFIBase() const { return Kind == FrameIndexBase; }
  67     void setReg(unsigned Reg) {
  68       assert(isRegBase() && "Invalid base register access!");
  69       Base.Reg = Reg;
  70     }
  71     unsigned getReg() const {
  72       assert(isRegBase() && "Invalid base register access!");
  73       return Base.Reg;
  74     }
  75     void setFI(unsigned FI) {
  76       assert(isFIBase() && "Invalid base frame index  access!");
  77       Base.FI = FI;
  78     }
  79     unsigned getFI() const {
  80       assert(isFIBase() && "Invalid base frame index access!");
  81       return Base.FI;
  82     }
  83     void setOffset(int64_t O) { Offset = O; }
  84     int64_t getOffset() { return Offset; }
  85
  86     void setGlobalValue(const GlobalValue *G) { GV = G; }
  87     const GlobalValue *getGlobalValue() { return GV; }
  88
  89     bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); }
  90   };
  91
  92   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
  93   /// make the right decision when generating code for different targets.
  94   const AArch64Subtarget *Subtarget;
  95   LLVMContext *Context;
  96
  97   bool FastLowerCall(CallLoweringInfo &CLI) override;
  98   bool FastLowerIntrinsicCall(const IntrinsicInst *II) override;
  99
 100 private:
 101   // Selection routines.
 102   bool SelectLoad(const Instruction *I);
 103   bool SelectStore(const Instruction *I);
 104   bool SelectBranch(const Instruction *I);
 105   bool SelectIndirectBr(const Instruction *I);
 106   bool SelectCmp(const Instruction *I);
 107   bool SelectSelect(const Instruction *I);
 108   bool SelectFPExt(const Instruction *I);
 109   bool SelectFPTrunc(const Instruction *I);
 110   bool SelectFPToInt(const Instruction *I, bool Signed);
 111   bool SelectIntToFP(const Instruction *I, bool Signed);
 112   bool SelectRem(const Instruction *I, unsigned ISDOpcode);
 113   bool SelectRet(const Instruction *I);
 114   bool SelectTrunc(const Instruction *I);
 115   bool SelectIntExt(const Instruction *I);
 116   bool SelectMul(const Instruction *I);
 117   bool SelectShift(const Instruction *I, bool IsLeftShift, bool IsArithmetic);
 118   bool SelectBitCast(const Instruction *I);
 119
 120   // Utility helper routines.
 121   bool isTypeLegal(Type *Ty, MVT &VT);
 122   bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
 123   bool ComputeAddress(const Value *Obj, Address &Addr);
 124   bool ComputeCallAddress(const Value *V, Address &Addr);
 125   bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
 126                        bool UseUnscaled);
 127   void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 128                             unsigned Flags, bool UseUnscaled);
 129   bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
 130   bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 131                           unsigned Alignment);
 132   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 133                          const Value *Cond);
 134
 135   // Emit functions.
 136   bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
 137   bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 138                 bool UseUnscaled = false);
 139   bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 140                  bool UseUnscaled = false);
 141   unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 142   unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 143   unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 144                        unsigned Op1, bool Op1IsKill);
 145   unsigned Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 146                          unsigned Op1, bool Op1IsKill);
 147   unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 148                          unsigned Op1, bool Op1IsKill);
 149   unsigned Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 150   unsigned Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 151   unsigned Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 152
 153   unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
 154   unsigned AArch64MaterializeGV(const GlobalValue *GV);
 155
 156   // Call handling routines.
 157 private:
 158   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 159   bool ProcessCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 160                        unsigned &NumBytes);
 161   bool FinishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 162
 163 public:
 164   // Backend specific FastISel code.
 165   unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
 166   unsigned TargetMaterializeConstant(const Constant *C) override;
 167
 168   explicit AArch64FastISel(FunctionLoweringInfo &funcInfo,
 169                          const TargetLibraryInfo *libInfo)
 170       : FastISel(funcInfo, libInfo) {
 171     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 172     Context = &funcInfo.Fn->getContext();
 173   }
 174
 175   bool TargetSelectInstruction(const Instruction *I) override;
 176
 177 #include "AArch64GenFastISel.inc"
 178 };
 179
 180 } // end anonymous namespace
 181
 182 #include "AArch64GenCallingConv.inc"
 183
 184 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 185   if (CC == CallingConv::WebKit_JS)
 186     return CC_AArch64_WebKit_JS;
 187   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 188 }
 189
 190 unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
 191   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
 192          "Alloca should always return a pointer.");
 193
 194   // Don't handle dynamic allocas.
 195   if (!FuncInfo.StaticAllocaMap.count(AI))
 196     return 0;
 197
 198   DenseMap<const AllocaInst *, int>::iterator SI =
 199       FuncInfo.StaticAllocaMap.find(AI);
 200
 201   if (SI != FuncInfo.StaticAllocaMap.end()) {
 202     unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
 203     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 204             ResultReg)
 205         .addFrameIndex(SI->second)
 206         .addImm(0)
 207         .addImm(0);
 208     return ResultReg;
 209   }
 210
 211   return 0;
 212 }
 213
 214 unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
 215   if (VT != MVT::f32 && VT != MVT::f64)
 216     return 0;
 217
 218   const APFloat Val = CFP->getValueAPF();
 219   bool is64bit = (VT == MVT::f64);
 220
 221   // This checks to see if we can use FMOV instructions to materialize
 222   // a constant, otherwise we have to materialize via the constant pool.
 223   if (TLI.isFPImmLegal(Val, VT)) {
 224     int Imm;
 225     unsigned Opc;
 226     if (is64bit) {
 227       Imm = AArch64_AM::getFP64Imm(Val);
 228       Opc = AArch64::FMOVDi;
 229     } else {
 230       Imm = AArch64_AM::getFP32Imm(Val);
 231       Opc = AArch64::FMOVSi;
 232     }
 233     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 234     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 235         .addImm(Imm);
 236     return ResultReg;
 237   }
 238
 239   // Materialize via constant pool.  MachineConstantPool wants an explicit
 240   // alignment.
 241   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 242   if (Align == 0)
 243     Align = DL.getTypeAllocSize(CFP->getType());
 244
 245   unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 246   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 247   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 248           ADRPReg).addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGE);
 249
 250   unsigned Opc = is64bit ? AArch64::LDRDui : AArch64::LDRSui;
 251   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 252   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 253       .addReg(ADRPReg)
 254       .addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 255   return ResultReg;
 256 }
 257
 258 unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
 259   // We can't handle thread-local variables quickly yet.
 260   if (GV->isThreadLocal())
 261     return 0;
 262
 263   // MachO still uses GOT for large code-model accesses, but ELF requires
 264   // movz/movk sequences, which FastISel doesn't handle yet.
 265   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
 266     return 0;
 267
 268   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 269
 270   EVT DestEVT = TLI.getValueType(GV->getType(), true);
 271   if (!DestEVT.isSimple())
 272     return 0;
 273
 274   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 275   unsigned ResultReg;
 276
 277   if (OpFlags & AArch64II::MO_GOT) {
 278     // ADRP + LDRX
 279     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 280             ADRPReg)
 281         .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 282
 283     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 284     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 285             ResultReg)
 286         .addReg(ADRPReg)
 287         .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 288                           AArch64II::MO_NC);
 289   } else {
 290     // ADRP + ADDX
 291     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 292             ADRPReg).addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 293
 294     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 295     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 296             ResultReg)
 297         .addReg(ADRPReg)
 298         .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 299         .addImm(0);
 300   }
 301   return ResultReg;
 302 }
 303
 304 unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
 305   EVT CEVT = TLI.getValueType(C->getType(), true);
 306
 307   // Only handle simple types.
 308   if (!CEVT.isSimple())
 309     return 0;
 310   MVT VT = CEVT.getSimpleVT();
 311
 312   // FIXME: Handle ConstantInt.
 313   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 314     return AArch64MaterializeFP(CFP, VT);
 315   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 316     return AArch64MaterializeGV(GV);
 317
 318   return 0;
 319 }
 320
 321 // Computes the address to get to an object.
 322 bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
 323   const User *U = nullptr;
 324   unsigned Opcode = Instruction::UserOp1;
 325   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 326     // Don't walk into other basic blocks unless the object is an alloca from
 327     // another block, otherwise it may not have a virtual register assigned.
 328     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 329         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 330       Opcode = I->getOpcode();
 331       U = I;
 332     }
 333   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 334     Opcode = C->getOpcode();
 335     U = C;
 336   }
 337
 338   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 339     if (Ty->getAddressSpace() > 255)
 340       // Fast instruction selection doesn't support the special
 341       // address spaces.
 342       return false;
 343
 344   switch (Opcode) {
 345   default:
 346     break;
 347   case Instruction::BitCast: {
 348     // Look through bitcasts.
 349     return ComputeAddress(U->getOperand(0), Addr);
 350   }
 351   case Instruction::IntToPtr: {
 352     // Look past no-op inttoptrs.
 353     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 354       return ComputeAddress(U->getOperand(0), Addr);
 355     break;
 356   }
 357   case Instruction::PtrToInt: {
 358     // Look past no-op ptrtoints.
 359     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 360       return ComputeAddress(U->getOperand(0), Addr);
 361     break;
 362   }
 363   case Instruction::GetElementPtr: {
 364     Address SavedAddr = Addr;
 365     uint64_t TmpOffset = Addr.getOffset();
 366
 367     // Iterate through the GEP folding the constants into offsets where
 368     // we can.
 369     gep_type_iterator GTI = gep_type_begin(U);
 370     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
 371          ++i, ++GTI) {
 372       const Value *Op = *i;
 373       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
 374         const StructLayout *SL = DL.getStructLayout(STy);
 375         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 376         TmpOffset += SL->getElementOffset(Idx);
 377       } else {
 378         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 379         for (;;) {
 380           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 381             // Constant-offset addressing.
 382             TmpOffset += CI->getSExtValue() * S;
 383             break;
 384           }
 385           if (canFoldAddIntoGEP(U, Op)) {
 386             // A compatible add with a constant operand. Fold the constant.
 387             ConstantInt *CI =
 388                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 389             TmpOffset += CI->getSExtValue() * S;
 390             // Iterate on the other operand.
 391             Op = cast<AddOperator>(Op)->getOperand(0);
 392             continue;
 393           }
 394           // Unsupported
 395           goto unsupported_gep;
 396         }
 397       }
 398     }
 399
 400     // Try to grab the base operand now.
 401     Addr.setOffset(TmpOffset);
 402     if (ComputeAddress(U->getOperand(0), Addr))
 403       return true;
 404
 405     // We failed, restore everything and try the other options.
 406     Addr = SavedAddr;
 407
 408   unsupported_gep:
 409     break;
 410   }
 411   case Instruction::Alloca: {
 412     const AllocaInst *AI = cast<AllocaInst>(Obj);
 413     DenseMap<const AllocaInst *, int>::iterator SI =
 414         FuncInfo.StaticAllocaMap.find(AI);
 415     if (SI != FuncInfo.StaticAllocaMap.end()) {
 416       Addr.setKind(Address::FrameIndexBase);
 417       Addr.setFI(SI->second);
 418       return true;
 419     }
 420     break;
 421   }
 422   }
 423
 424   // Try to get this in a register if nothing else has worked.
 425   if (!Addr.isValid())
 426     Addr.setReg(getRegForValue(Obj));
 427   return Addr.isValid();
 428 }
 429
 430 bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) {
 431   const User *U = nullptr;
 432   unsigned Opcode = Instruction::UserOp1;
 433   bool InMBB = true;
 434
 435   if (const auto *I = dyn_cast<Instruction>(V)) {
 436     Opcode = I->getOpcode();
 437     U = I;
 438     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 439   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 440     Opcode = C->getOpcode();
 441     U = C;
 442   }
 443
 444   switch (Opcode) {
 445   default: break;
 446   case Instruction::BitCast:
 447     // Look past bitcasts if its operand is in the same BB.
 448     if (InMBB)
 449       return ComputeCallAddress(U->getOperand(0), Addr);
 450     break;
 451   case Instruction::IntToPtr:
 452     // Look past no-op inttoptrs if its operand is in the same BB.
 453     if (InMBB &&
 454         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 455       return ComputeCallAddress(U->getOperand(0), Addr);
 456     break;
 457   case Instruction::PtrToInt:
 458     // Look past no-op ptrtoints if its operand is in the same BB.
 459     if (InMBB &&
 460         TLI.getValueType(U->getType()) == TLI.getPointerTy())
 461       return ComputeCallAddress(U->getOperand(0), Addr);
 462     break;
 463   }
 464
 465   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 466     Addr.setGlobalValue(GV);
 467     return true;
 468   }
 469
 470   // If all else fails, try to materialize the value in a register.
 471   if (!Addr.getGlobalValue()) {
 472     Addr.setReg(getRegForValue(V));
 473     return Addr.getReg() != 0;
 474   }
 475
 476   return false;
 477 }
 478
 479
 480 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 481   EVT evt = TLI.getValueType(Ty, true);
 482
 483   // Only handle simple types.
 484   if (evt == MVT::Other || !evt.isSimple())
 485     return false;
 486   VT = evt.getSimpleVT();
 487
 488   // This is a legal type, but it's not something we handle in fast-isel.
 489   if (VT == MVT::f128)
 490     return false;
 491
 492   // Handle all other legal types, i.e. a register that will directly hold this
 493   // value.
 494   return TLI.isTypeLegal(VT);
 495 }
 496
 497 bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
 498   if (isTypeLegal(Ty, VT))
 499     return true;
 500
 501   // If this is a type than can be sign or zero-extended to a basic operation
 502   // go ahead and accept it now. For stores, this reflects truncation.
 503   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 504     return true;
 505
 506   return false;
 507 }
 508
 509 bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
 510                                       int64_t ScaleFactor, bool UseUnscaled) {
 511   bool needsLowering = false;
 512   int64_t Offset = Addr.getOffset();
 513   switch (VT.SimpleTy) {
 514   default:
 515     return false;
 516   case MVT::i1:
 517   case MVT::i8:
 518   case MVT::i16:
 519   case MVT::i32:
 520   case MVT::i64:
 521   case MVT::f32:
 522   case MVT::f64:
 523     if (!UseUnscaled)
 524       // Using scaled, 12-bit, unsigned immediate offsets.
 525       needsLowering = ((Offset & 0xfff) != Offset);
 526     else
 527       // Using unscaled, 9-bit, signed immediate offsets.
 528       needsLowering = (Offset > 256 || Offset < -256);
 529     break;
 530   }
 531
 532   //If this is a stack pointer and the offset needs to be simplified then put
 533   // the alloca address into a register, set the base type back to register and
 534   // continue. This should almost never happen.
 535   if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
 536     unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
 537     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 538             ResultReg)
 539         .addFrameIndex(Addr.getFI())
 540         .addImm(0)
 541         .addImm(0);
 542     Addr.setKind(Address::RegBase);
 543     Addr.setReg(ResultReg);
 544   }
 545
 546   // Since the offset is too large for the load/store instruction get the
 547   // reg+offset into a register.
 548   if (needsLowering) {
 549     uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor;
 550     unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false,
 551                                       UnscaledOffset, MVT::i64);
 552     if (ResultReg == 0)
 553       return false;
 554     Addr.setReg(ResultReg);
 555     Addr.setOffset(0);
 556   }
 557   return true;
 558 }
 559
 560 void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
 561                                            const MachineInstrBuilder &MIB,
 562                                            unsigned Flags, bool UseUnscaled) {
 563   int64_t Offset = Addr.getOffset();
 564   // Frame base works a bit differently. Handle it separately.
 565   if (Addr.getKind() == Address::FrameIndexBase) {
 566     int FI = Addr.getFI();
 567     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
 568     // and alignment should be based on the VT.
 569     MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
 570         MachinePointerInfo::getFixedStack(FI, Offset), Flags,
 571         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
 572     // Now add the rest of the operands.
 573     MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
 574   } else {
 575     // Now add the rest of the operands.
 576     MIB.addReg(Addr.getReg());
 577     MIB.addImm(Offset);
 578   }
 579 }
 580
 581 bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 582                                bool UseUnscaled) {
 583   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
 584   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
 585   if (!UseUnscaled && Addr.getOffset() < 0)
 586     UseUnscaled = true;
 587
 588   unsigned Opc;
 589   const TargetRegisterClass *RC;
 590   bool VTIsi1 = false;
 591   int64_t ScaleFactor = 0;
 592   switch (VT.SimpleTy) {
 593   default:
 594     return false;
 595   case MVT::i1:
 596     VTIsi1 = true;
 597   // Intentional fall-through.
 598   case MVT::i8:
 599     Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui;
 600     RC = &AArch64::GPR32RegClass;
 601     ScaleFactor = 1;
 602     break;
 603   case MVT::i16:
 604     Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui;
 605     RC = &AArch64::GPR32RegClass;
 606     ScaleFactor = 2;
 607     break;
 608   case MVT::i32:
 609     Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui;
 610     RC = &AArch64::GPR32RegClass;
 611     ScaleFactor = 4;
 612     break;
 613   case MVT::i64:
 614     Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui;
 615     RC = &AArch64::GPR64RegClass;
 616     ScaleFactor = 8;
 617     break;
 618   case MVT::f32:
 619     Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui;
 620     RC = TLI.getRegClassFor(VT);
 621     ScaleFactor = 4;
 622     break;
 623   case MVT::f64:
 624     Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui;
 625     RC = TLI.getRegClassFor(VT);
 626     ScaleFactor = 8;
 627     break;
 628   }
 629   // Scale the offset.
 630   if (!UseUnscaled) {
 631     int64_t Offset = Addr.getOffset();
 632     if (Offset & (ScaleFactor - 1))
 633       // Retry using an unscaled, 9-bit, signed immediate offset.
 634       return EmitLoad(VT, ResultReg, Addr, /*UseUnscaled*/ true);
 635
 636     Addr.setOffset(Offset / ScaleFactor);
 637   }
 638
 639   // Simplify this down to something we can handle.
 640   if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
 641     return false;
 642
 643   // Create the base instruction, then add the operands.
 644   ResultReg = createResultReg(RC);
 645   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 646                                     TII.get(Opc), ResultReg);
 647   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled);
 648
 649   // Loading an i1 requires special handling.
 650   if (VTIsi1) {
 651     MRI.constrainRegClass(ResultReg, &AArch64::GPR32RegClass);
 652     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 653     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
 654             ANDReg)
 655         .addReg(ResultReg)
 656         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 657     ResultReg = ANDReg;
 658   }
 659   return true;
 660 }
 661
 662 bool AArch64FastISel::SelectLoad(const Instruction *I) {
 663   MVT VT;
 664   // Verify we have a legal type before going any further.  Currently, we handle
 665   // simple types that will directly fit in a register (i32/f32/i64/f64) or
 666   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
 667   if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic())
 668     return false;
 669
 670   // See if we can handle this address.
 671   Address Addr;
 672   if (!ComputeAddress(I->getOperand(0), Addr))
 673     return false;
 674
 675   unsigned ResultReg;
 676   if (!EmitLoad(VT, ResultReg, Addr))
 677     return false;
 678
 679   UpdateValueMap(I, ResultReg);
 680   return true;
 681 }
 682
 683 bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 684                                 bool UseUnscaled) {
 685   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
 686   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
 687   if (!UseUnscaled && Addr.getOffset() < 0)
 688     UseUnscaled = true;
 689
 690   unsigned StrOpc;
 691   bool VTIsi1 = false;
 692   int64_t ScaleFactor = 0;
 693   // Using scaled, 12-bit, unsigned immediate offsets.
 694   switch (VT.SimpleTy) {
 695   default:
 696     return false;
 697   case MVT::i1:
 698     VTIsi1 = true;
 699   case MVT::i8:
 700     StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui;
 701     ScaleFactor = 1;
 702     break;
 703   case MVT::i16:
 704     StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui;
 705     ScaleFactor = 2;
 706     break;
 707   case MVT::i32:
 708     StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui;
 709     ScaleFactor = 4;
 710     break;
 711   case MVT::i64:
 712     StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui;
 713     ScaleFactor = 8;
 714     break;
 715   case MVT::f32:
 716     StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui;
 717     ScaleFactor = 4;
 718     break;
 719   case MVT::f64:
 720     StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui;
 721     ScaleFactor = 8;
 722     break;
 723   }
 724   // Scale the offset.
 725   if (!UseUnscaled) {
 726     int64_t Offset = Addr.getOffset();
 727     if (Offset & (ScaleFactor - 1))
 728       // Retry using an unscaled, 9-bit, signed immediate offset.
 729       return EmitStore(VT, SrcReg, Addr, /*UseUnscaled*/ true);
 730
 731     Addr.setOffset(Offset / ScaleFactor);
 732   }
 733
 734   // Simplify this down to something we can handle.
 735   if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
 736     return false;
 737
 738   // Storing an i1 requires special handling.
 739   if (VTIsi1) {
 740     MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
 741     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 742     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
 743             ANDReg)
 744         .addReg(SrcReg)
 745         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 746     SrcReg = ANDReg;
 747   }
 748   // Create the base instruction, then add the operands.
 749   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 750                                     TII.get(StrOpc)).addReg(SrcReg);
 751   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled);
 752   return true;
 753 }
 754
 755 bool AArch64FastISel::SelectStore(const Instruction *I) {
 756   MVT VT;
 757   Value *Op0 = I->getOperand(0);
 758   // Verify we have a legal type before going any further.  Currently, we handle
 759   // simple types that will directly fit in a register (i32/f32/i64/f64) or
 760   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
 761   if (!isLoadStoreTypeLegal(Op0->getType(), VT) ||
 762       cast<StoreInst>(I)->isAtomic())
 763     return false;
 764
 765   // Get the value to be stored into a register.
 766   unsigned SrcReg = getRegForValue(Op0);
 767   if (SrcReg == 0)
 768     return false;
 769
 770   // See if we can handle this address.
 771   Address Addr;
 772   if (!ComputeAddress(I->getOperand(1), Addr))
 773     return false;
 774
 775   if (!EmitStore(VT, SrcReg, Addr))
 776     return false;
 777   return true;
 778 }
 779
 780 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
 781   switch (Pred) {
 782   case CmpInst::FCMP_ONE:
 783   case CmpInst::FCMP_UEQ:
 784   default:
 785     // AL is our "false" for now. The other two need more compares.
 786     return AArch64CC::AL;
 787   case CmpInst::ICMP_EQ:
 788   case CmpInst::FCMP_OEQ:
 789     return AArch64CC::EQ;
 790   case CmpInst::ICMP_SGT:
 791   case CmpInst::FCMP_OGT:
 792     return AArch64CC::GT;
 793   case CmpInst::ICMP_SGE:
 794   case CmpInst::FCMP_OGE:
 795     return AArch64CC::GE;
 796   case CmpInst::ICMP_UGT:
 797   case CmpInst::FCMP_UGT:
 798     return AArch64CC::HI;
 799   case CmpInst::FCMP_OLT:
 800     return AArch64CC::MI;
 801   case CmpInst::ICMP_ULE:
 802   case CmpInst::FCMP_OLE:
 803     return AArch64CC::LS;
 804   case CmpInst::FCMP_ORD:
 805     return AArch64CC::VC;
 806   case CmpInst::FCMP_UNO:
 807     return AArch64CC::VS;
 808   case CmpInst::FCMP_UGE:
 809     return AArch64CC::PL;
 810   case CmpInst::ICMP_SLT:
 811   case CmpInst::FCMP_ULT:
 812     return AArch64CC::LT;
 813   case CmpInst::ICMP_SLE:
 814   case CmpInst::FCMP_ULE:
 815     return AArch64CC::LE;
 816   case CmpInst::FCMP_UNE:
 817   case CmpInst::ICMP_NE:
 818     return AArch64CC::NE;
 819   case CmpInst::ICMP_UGE:
 820     return AArch64CC::HS;
 821   case CmpInst::ICMP_ULT:
 822     return AArch64CC::LO;
 823   }
 824 }
 825
 826 bool AArch64FastISel::SelectBranch(const Instruction *I) {
 827   const BranchInst *BI = cast<BranchInst>(I);
 828   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
 829   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
 830
 831   AArch64CC::CondCode CC = AArch64CC::NE;
 832   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
 833     if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
 834       // We may not handle every CC for now.
 835       CC = getCompareCC(CI->getPredicate());
 836       if (CC == AArch64CC::AL)
 837         return false;
 838
 839       // Emit the cmp.
 840       if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
 841         return false;
 842
 843       // Emit the branch.
 844       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 845           .addImm(CC)
 846           .addMBB(TBB);
 847
 848       // Obtain the branch weight and add the TrueBB to the successor list.
 849       uint32_t BranchWeight = 0;
 850       if (FuncInfo.BPI)
 851         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 852                                                   TBB->getBasicBlock());
 853       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
 854
 855       FastEmitBranch(FBB, DbgLoc);
 856       return true;
 857     }
 858   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
 859     MVT SrcVT;
 860     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
 861         (isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) {
 862       unsigned CondReg = getRegForValue(TI->getOperand(0));
 863       if (CondReg == 0)
 864         return false;
 865
 866       // Issue an extract_subreg to get the lower 32-bits.
 867       if (SrcVT == MVT::i64)
 868         CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true,
 869                                              AArch64::sub_32);
 870
 871       MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
 872       unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 873       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 874               TII.get(AArch64::ANDWri), ANDReg)
 875           .addReg(CondReg)
 876           .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 877       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 878               TII.get(AArch64::SUBSWri))
 879           .addReg(ANDReg)
 880           .addReg(ANDReg)
 881           .addImm(0)
 882           .addImm(0);
 883
 884       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
 885         std::swap(TBB, FBB);
 886         CC = AArch64CC::EQ;
 887       }
 888       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 889           .addImm(CC)
 890           .addMBB(TBB);
 891
 892       // Obtain the branch weight and add the TrueBB to the successor list.
 893       uint32_t BranchWeight = 0;
 894       if (FuncInfo.BPI)
 895         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 896                                                   TBB->getBasicBlock());
 897       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
 898
 899       FastEmitBranch(FBB, DbgLoc);
 900       return true;
 901     }
 902   } else if (const ConstantInt *CI =
 903                  dyn_cast<ConstantInt>(BI->getCondition())) {
 904     uint64_t Imm = CI->getZExtValue();
 905     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
 906     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
 907         .addMBB(Target);
 908
 909     // Obtain the branch weight and add the target to the successor list.
 910     uint32_t BranchWeight = 0;
 911     if (FuncInfo.BPI)
 912       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 913                                                  Target->getBasicBlock());
 914     FuncInfo.MBB->addSuccessor(Target, BranchWeight);
 915     return true;
 916   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
 917     // Fake request the condition, otherwise the intrinsic might be completely
 918     // optimized away.
 919     unsigned CondReg = getRegForValue(BI->getCondition());
 920     if (!CondReg)
 921       return false;
 922
 923     // Emit the branch.
 924     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 925       .addImm(CC)
 926       .addMBB(TBB);
 927
 928     // Obtain the branch weight and add the TrueBB to the successor list.
 929     uint32_t BranchWeight = 0;
 930     if (FuncInfo.BPI)
 931       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 932                                                  TBB->getBasicBlock());
 933     FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
 934
 935     FastEmitBranch(FBB, DbgLoc);
 936     return true;
 937   }
 938
 939   unsigned CondReg = getRegForValue(BI->getCondition());
 940   if (CondReg == 0)
 941     return false;
 942
 943   // We've been divorced from our compare!  Our block was split, and
 944   // now our compare lives in a predecessor block.  We musn't
 945   // re-compare here, as the children of the compare aren't guaranteed
 946   // live across the block boundary (we *could* check for this).
 947   // Regardless, the compare has been done in the predecessor block,
 948   // and it left a value for us in a virtual register.  Ergo, we test
 949   // the one-bit value left in the virtual register.
 950   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri),
 951           AArch64::WZR)
 952       .addReg(CondReg)
 953       .addImm(0)
 954       .addImm(0);
 955
 956   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
 957     std::swap(TBB, FBB);
 958     CC = AArch64CC::EQ;
 959   }
 960
 961   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 962       .addImm(CC)
 963       .addMBB(TBB);
 964
 965   // Obtain the branch weight and add the TrueBB to the successor list.
 966   uint32_t BranchWeight = 0;
 967   if (FuncInfo.BPI)
 968     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 969                                                TBB->getBasicBlock());
 970   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
 971
 972   FastEmitBranch(FBB, DbgLoc);
 973   return true;
 974 }
 975
 976 bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
 977   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
 978   unsigned AddrReg = getRegForValue(BI->getOperand(0));
 979   if (AddrReg == 0)
 980     return false;
 981
 982   // Emit the indirect branch.
 983   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BR))
 984       .addReg(AddrReg);
 985
 986   // Make sure the CFG is up-to-date.
 987   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
 988     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
 989
 990   return true;
 991 }
 992
 993 bool AArch64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
 994   Type *Ty = Src1Value->getType();
 995   EVT SrcEVT = TLI.getValueType(Ty, true);
 996   if (!SrcEVT.isSimple())
 997     return false;
 998   MVT SrcVT = SrcEVT.getSimpleVT();
 999
1000   // Check to see if the 2nd operand is a constant that we can encode directly
1001   // in the compare.
1002   uint64_t Imm;
1003   bool UseImm = false;
1004   bool isNegativeImm = false;
1005   if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
1006     if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
1007         SrcVT == MVT::i8 || SrcVT == MVT::i1) {
1008       const APInt &CIVal = ConstInt->getValue();
1009
1010       Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue();
1011       if (CIVal.isNegative()) {
1012         isNegativeImm = true;
1013         Imm = -Imm;
1014       }
1015       // FIXME: We can handle more immediates using shifts.
1016       UseImm = ((Imm & 0xfff) == Imm);
1017     }
1018   } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
1019     if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
1020       if (ConstFP->isZero() && !ConstFP->isNegative())
1021         UseImm = true;
1022   }
1023
1024   unsigned ZReg;
1025   unsigned CmpOpc;
1026   bool isICmp = true;
1027   bool needsExt = false;
1028   switch (SrcVT.SimpleTy) {
1029   default:
1030     return false;
1031   case MVT::i1:
1032   case MVT::i8:
1033   case MVT::i16:
1034     needsExt = true;
1035   // Intentional fall-through.
1036   case MVT::i32:
1037     ZReg = AArch64::WZR;
1038     if (UseImm)
1039       CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri;
1040     else
1041       CmpOpc = AArch64::SUBSWrr;
1042     break;
1043   case MVT::i64:
1044     ZReg = AArch64::XZR;
1045     if (UseImm)
1046       CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri;
1047     else
1048       CmpOpc = AArch64::SUBSXrr;
1049     break;
1050   case MVT::f32:
1051     isICmp = false;
1052     CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr;
1053     break;
1054   case MVT::f64:
1055     isICmp = false;
1056     CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr;
1057     break;
1058   }
1059
1060   unsigned SrcReg1 = getRegForValue(Src1Value);
1061   if (SrcReg1 == 0)
1062     return false;
1063
1064   unsigned SrcReg2;
1065   if (!UseImm) {
1066     SrcReg2 = getRegForValue(Src2Value);
1067     if (SrcReg2 == 0)
1068       return false;
1069   }
1070
1071   // We have i1, i8, or i16, we need to either zero extend or sign extend.
1072   if (needsExt) {
1073     SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
1074     if (SrcReg1 == 0)
1075       return false;
1076     if (!UseImm) {
1077       SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
1078       if (SrcReg2 == 0)
1079         return false;
1080     }
1081   }
1082
1083   if (isICmp) {
1084     if (UseImm)
1085       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1086           .addReg(ZReg)
1087           .addReg(SrcReg1)
1088           .addImm(Imm)
1089           .addImm(0);
1090     else
1091       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1092           .addReg(ZReg)
1093           .addReg(SrcReg1)
1094           .addReg(SrcReg2);
1095   } else {
1096     if (UseImm)
1097       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1098           .addReg(SrcReg1);
1099     else
1100       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1101           .addReg(SrcReg1)
1102           .addReg(SrcReg2);
1103   }
1104   return true;
1105 }
1106
1107 bool AArch64FastISel::SelectCmp(const Instruction *I) {
1108   const CmpInst *CI = cast<CmpInst>(I);
1109
1110   // We may not handle every CC for now.
1111   AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
1112   if (CC == AArch64CC::AL)
1113     return false;
1114
1115   // Emit the cmp.
1116   if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1117     return false;
1118
1119   // Now set a register based on the comparison.
1120   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
1121   unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
1122   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1123           ResultReg)
1124       .addReg(AArch64::WZR)
1125       .addReg(AArch64::WZR)
1126       .addImm(invertedCC);
1127
1128   UpdateValueMap(I, ResultReg);
1129   return true;
1130 }
1131
1132 bool AArch64FastISel::SelectSelect(const Instruction *I) {
1133   const SelectInst *SI = cast<SelectInst>(I);
1134
1135   EVT DestEVT = TLI.getValueType(SI->getType(), true);
1136   if (!DestEVT.isSimple())
1137     return false;
1138
1139   MVT DestVT = DestEVT.getSimpleVT();
1140   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
1141       DestVT != MVT::f64)
1142     return false;
1143
1144   unsigned SelectOpc;
1145   switch (DestVT.SimpleTy) {
1146   default: return false;
1147   case MVT::i32: SelectOpc = AArch64::CSELWr;    break;
1148   case MVT::i64: SelectOpc = AArch64::CSELXr;    break;
1149   case MVT::f32: SelectOpc = AArch64::FCSELSrrr; break;
1150   case MVT::f64: SelectOpc = AArch64::FCSELDrrr; break;
1151   }
1152
1153   const Value *Cond = SI->getCondition();
1154   bool NeedTest = true;
1155   AArch64CC::CondCode CC = AArch64CC::NE;
1156   if (foldXALUIntrinsic(CC, I, Cond))
1157     NeedTest = false;
1158
1159   unsigned CondReg = getRegForValue(Cond);
1160   if (!CondReg)
1161     return false;
1162   bool CondIsKill = hasTrivialKill(Cond);
1163
1164   if (NeedTest) {
1165     MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
1166     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
1167     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
1168             ANDReg)
1169       .addReg(CondReg, getKillRegState(CondIsKill))
1170       .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1171
1172     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri))
1173       .addReg(ANDReg)
1174       .addReg(ANDReg)
1175       .addImm(0)
1176       .addImm(0);
1177   }
1178
1179   unsigned TrueReg = getRegForValue(SI->getTrueValue());
1180   bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
1181
1182   unsigned FalseReg = getRegForValue(SI->getFalseValue());
1183   bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
1184
1185   if (!TrueReg || !FalseReg)
1186     return false;
1187
1188   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
1189   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc),
1190           ResultReg)
1191     .addReg(TrueReg, getKillRegState(TrueIsKill))
1192     .addReg(FalseReg, getKillRegState(FalseIsKill))
1193     .addImm(CC);
1194
1195   UpdateValueMap(I, ResultReg);
1196   return true;
1197 }
1198
1199 bool AArch64FastISel::SelectFPExt(const Instruction *I) {
1200   Value *V = I->getOperand(0);
1201   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
1202     return false;
1203
1204   unsigned Op = getRegForValue(V);
1205   if (Op == 0)
1206     return false;
1207
1208   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
1209   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
1210           ResultReg).addReg(Op);
1211   UpdateValueMap(I, ResultReg);
1212   return true;
1213 }
1214
1215 bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
1216   Value *V = I->getOperand(0);
1217   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
1218     return false;
1219
1220   unsigned Op = getRegForValue(V);
1221   if (Op == 0)
1222     return false;
1223
1224   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
1225   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
1226           ResultReg).addReg(Op);
1227   UpdateValueMap(I, ResultReg);
1228   return true;
1229 }
1230
1231 // FPToUI and FPToSI
1232 bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
1233   MVT DestVT;
1234   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1235     return false;
1236
1237   unsigned SrcReg = getRegForValue(I->getOperand(0));
1238   if (SrcReg == 0)
1239     return false;
1240
1241   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1242   if (SrcVT == MVT::f128)
1243     return false;
1244
1245   unsigned Opc;
1246   if (SrcVT == MVT::f64) {
1247     if (Signed)
1248       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
1249     else
1250       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
1251   } else {
1252     if (Signed)
1253       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
1254     else
1255       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
1256   }
1257   unsigned ResultReg = createResultReg(
1258       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
1259   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1260       .addReg(SrcReg);
1261   UpdateValueMap(I, ResultReg);
1262   return true;
1263 }
1264
1265 bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
1266   MVT DestVT;
1267   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1268     return false;
1269   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
1270           "Unexpected value type.");
1271
1272   unsigned SrcReg = getRegForValue(I->getOperand(0));
1273   if (SrcReg == 0)
1274     return false;
1275
1276   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1277
1278   // Handle sign-extension.
1279   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
1280     SrcReg =
1281         EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
1282     if (SrcReg == 0)
1283       return false;
1284   }
1285
1286   MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &AArch64::GPR64RegClass
1287                                                   : &AArch64::GPR32RegClass);
1288
1289   unsigned Opc;
1290   if (SrcVT == MVT::i64) {
1291     if (Signed)
1292       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
1293     else
1294       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
1295   } else {
1296     if (Signed)
1297       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
1298     else
1299       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
1300   }
1301
1302   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
1303   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1304       .addReg(SrcReg);
1305   UpdateValueMap(I, ResultReg);
1306   return true;
1307 }
1308
1309 bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
1310                                       SmallVectorImpl<MVT> &OutVTs,
1311                                       unsigned &NumBytes) {
1312   CallingConv::ID CC = CLI.CallConv;
1313   SmallVector<CCValAssign, 16> ArgLocs;
1314   CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
1315   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
1316
1317   // Get a count of how many bytes are to be pushed on the stack.
1318   NumBytes = CCInfo.getNextStackOffset();
1319
1320   // Issue CALLSEQ_START
1321   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1322   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
1323     .addImm(NumBytes);
1324
1325   // Process the args.
1326   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1327     CCValAssign &VA = ArgLocs[i];
1328     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
1329     MVT ArgVT = OutVTs[VA.getValNo()];
1330
1331     unsigned ArgReg = getRegForValue(ArgVal);
1332     if (!ArgReg)
1333       return false;
1334
1335     // Handle arg promotion: SExt, ZExt, AExt.
1336     switch (VA.getLocInfo()) {
1337     case CCValAssign::Full:
1338       break;
1339     case CCValAssign::SExt: {
1340       MVT DestVT = VA.getLocVT();
1341       MVT SrcVT = ArgVT;
1342       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
1343       if (!ArgReg)
1344         return false;
1345       break;
1346     }
1347     case CCValAssign::AExt:
1348     // Intentional fall-through.
1349     case CCValAssign::ZExt: {
1350       MVT DestVT = VA.getLocVT();
1351       MVT SrcVT = ArgVT;
1352       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
1353       if (!ArgReg)
1354         return false;
1355       break;
1356     }
1357     default:
1358       llvm_unreachable("Unknown arg promotion!");
1359     }
1360
1361     // Now copy/store arg to correct locations.
1362     if (VA.isRegLoc() && !VA.needsCustom()) {
1363       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1364               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
1365       CLI.OutRegs.push_back(VA.getLocReg());
1366     } else if (VA.needsCustom()) {
1367       // FIXME: Handle custom args.
1368       return false;
1369     } else {
1370       assert(VA.isMemLoc() && "Assuming store on stack.");
1371
1372       // Don't emit stores for undef values.
1373       if (isa<UndefValue>(ArgVal))
1374         continue;
1375
1376       // Need to store on the stack.
1377       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
1378
1379       unsigned BEAlign = 0;
1380       if (ArgSize < 8 && !Subtarget->isLittleEndian())
1381         BEAlign = 8 - ArgSize;
1382
1383       Address Addr;
1384       Addr.setKind(Address::RegBase);
1385       Addr.setReg(AArch64::SP);
1386       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
1387
1388       if (!EmitStore(ArgVT, ArgReg, Addr))
1389         return false;
1390     }
1391   }
1392   return true;
1393 }
1394
1395 bool AArch64FastISel::FinishCall(CallLoweringInfo &CLI, MVT RetVT,
1396                                  unsigned NumBytes) {
1397   CallingConv::ID CC = CLI.CallConv;
1398
1399   // Issue CALLSEQ_END
1400   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
1401   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
1402     .addImm(NumBytes).addImm(0);
1403
1404   // Now the return value.
1405   if (RetVT != MVT::isVoid) {
1406     SmallVector<CCValAssign, 16> RVLocs;
1407     CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
1408     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
1409
1410     // Only handle a single return value.
1411     if (RVLocs.size() != 1)
1412       return false;
1413
1414     // Copy all of the result registers out of their specified physreg.
1415     MVT CopyVT = RVLocs[0].getValVT();
1416     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
1417     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1418             TII.get(TargetOpcode::COPY), ResultReg)
1419       .addReg(RVLocs[0].getLocReg());
1420     CLI.InRegs.push_back(RVLocs[0].getLocReg());
1421
1422     CLI.ResultReg = ResultReg;
1423     CLI.NumResultRegs = 1;
1424   }
1425
1426   return true;
1427 }
1428
1429 bool AArch64FastISel::FastLowerCall(CallLoweringInfo &CLI) {
1430   CallingConv::ID CC  = CLI.CallConv;
1431   bool IsVarArg       = CLI.IsVarArg;
1432   const Value *Callee = CLI.Callee;
1433   const char *SymName = CLI.SymName;
1434
1435   CodeModel::Model CM = TM.getCodeModel();
1436   // Only support the small and large code model.
1437   if (CM != CodeModel::Small && CM != CodeModel::Large)
1438     return false;
1439
1440   // FIXME: Add large code model support for ELF.
1441   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
1442     return false;
1443
1444   // Let SDISel handle vararg functions.
1445   if (IsVarArg)
1446     return false;
1447
1448   // FIXME: Only handle *simple* calls for now.
1449   MVT RetVT;
1450   if (CLI.RetTy->isVoidTy())
1451     RetVT = MVT::isVoid;
1452   else if (!isTypeLegal(CLI.RetTy, RetVT))
1453     return false;
1454
1455   for (auto Flag : CLI.OutFlags)
1456     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
1457       return false;
1458
1459   // Set up the argument vectors.
1460   SmallVector<MVT, 16> OutVTs;
1461   OutVTs.reserve(CLI.OutVals.size());
1462
1463   for (auto *Val : CLI.OutVals) {
1464     MVT VT;
1465     if (!isTypeLegal(Val->getType(), VT) &&
1466         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
1467       return false;
1468
1469     // We don't handle vector parameters yet.
1470     if (VT.isVector() || VT.getSizeInBits() > 64)
1471       return false;
1472
1473     OutVTs.push_back(VT);
1474   }
1475
1476   Address Addr;
1477   if (!ComputeCallAddress(Callee, Addr))
1478     return false;
1479
1480   // Handle the arguments now that we've gotten them.
1481   unsigned NumBytes;
1482   if (!ProcessCallArgs(CLI, OutVTs, NumBytes))
1483     return false;
1484
1485   // Issue the call.
1486   MachineInstrBuilder MIB;
1487   if (CM == CodeModel::Small) {
1488     unsigned CallOpc = Addr.getReg() ? AArch64::BLR : AArch64::BL;
1489     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
1490     if (SymName)
1491       MIB.addExternalSymbol(SymName, 0);
1492     else if (Addr.getGlobalValue())
1493       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
1494     else if (Addr.getReg())
1495       MIB.addReg(Addr.getReg());
1496     else
1497       return false;
1498   } else {
1499     unsigned CallReg = 0;
1500     if (SymName) {
1501       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
1502       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
1503               ADRPReg)
1504         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
1505
1506       CallReg = createResultReg(&AArch64::GPR64RegClass);
1507       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
1508               CallReg)
1509         .addReg(ADRPReg)
1510         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
1511                            AArch64II::MO_NC);
1512     } else if (Addr.getGlobalValue()) {
1513       CallReg = AArch64MaterializeGV(Addr.getGlobalValue());
1514     } else if (Addr.getReg())
1515       CallReg = Addr.getReg();
1516
1517     if (!CallReg)
1518       return false;
1519
1520     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1521                   TII.get(AArch64::BLR)).addReg(CallReg);
1522   }
1523
1524   // Add implicit physical register uses to the call.
1525   for (auto Reg : CLI.OutRegs)
1526     MIB.addReg(Reg, RegState::Implicit);
1527
1528   // Add a register mask with the call-preserved registers.
1529   // Proper defs for return values will be added by setPhysRegsDeadExcept().
1530   MIB.addRegMask(TRI.getCallPreservedMask(CC));
1531
1532   CLI.Call = MIB;
1533
1534   // Finish off the call including any return values.
1535   return FinishCall(CLI, RetVT, NumBytes);
1536 }
1537
1538 bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
1539   if (Alignment)
1540     return Len / Alignment <= 4;
1541   else
1542     return Len < 32;
1543 }
1544
1545 bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
1546                                          uint64_t Len, unsigned Alignment) {
1547   // Make sure we don't bloat code by inlining very large memcpy's.
1548   if (!IsMemCpySmall(Len, Alignment))
1549     return false;
1550
1551   int64_t UnscaledOffset = 0;
1552   Address OrigDest = Dest;
1553   Address OrigSrc = Src;
1554
1555   while (Len) {
1556     MVT VT;
1557     if (!Alignment || Alignment >= 8) {
1558       if (Len >= 8)
1559         VT = MVT::i64;
1560       else if (Len >= 4)
1561         VT = MVT::i32;
1562       else if (Len >= 2)
1563         VT = MVT::i16;
1564       else {
1565         VT = MVT::i8;
1566       }
1567     } else {
1568       // Bound based on alignment.
1569       if (Len >= 4 && Alignment == 4)
1570         VT = MVT::i32;
1571       else if (Len >= 2 && Alignment == 2)
1572         VT = MVT::i16;
1573       else {
1574         VT = MVT::i8;
1575       }
1576     }
1577
1578     bool RV;
1579     unsigned ResultReg;
1580     RV = EmitLoad(VT, ResultReg, Src);
1581     if (!RV)
1582       return false;
1583
1584     RV = EmitStore(VT, ResultReg, Dest);
1585     if (!RV)
1586       return false;
1587
1588     int64_t Size = VT.getSizeInBits() / 8;
1589     Len -= Size;
1590     UnscaledOffset += Size;
1591
1592     // We need to recompute the unscaled offset for each iteration.
1593     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
1594     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
1595   }
1596
1597   return true;
1598 }
1599
1600 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
1601 /// into the user. The condition code will only be updated on success.
1602 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
1603                                         const Instruction *I,
1604                                         const Value *Cond) {
1605   if (!isa<ExtractValueInst>(Cond))
1606     return false;
1607
1608   const auto *EV = cast<ExtractValueInst>(Cond);
1609   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
1610     return false;
1611
1612   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
1613   MVT RetVT;
1614   const Function *Callee = II->getCalledFunction();
1615   Type *RetTy =
1616   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
1617   if (!isTypeLegal(RetTy, RetVT))
1618     return false;
1619
1620   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1621     return false;
1622
1623   AArch64CC::CondCode TmpCC;
1624   switch (II->getIntrinsicID()) {
1625     default: return false;
1626     case Intrinsic::sadd_with_overflow:
1627     case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break;
1628     case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break;
1629     case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break;
1630     case Intrinsic::smul_with_overflow:
1631     case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break;
1632   }
1633
1634   // Check if both instructions are in the same basic block.
1635   if (II->getParent() != I->getParent())
1636     return false;
1637
1638   // Make sure nothing is in the way
1639   BasicBlock::const_iterator Start = I;
1640   BasicBlock::const_iterator End = II;
1641   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
1642     // We only expect extractvalue instructions between the intrinsic and the
1643     // instruction to be selected.
1644     if (!isa<ExtractValueInst>(Itr))
1645       return false;
1646
1647     // Check that the extractvalue operand comes from the intrinsic.
1648     const auto *EVI = cast<ExtractValueInst>(Itr);
1649     if (EVI->getAggregateOperand() != II)
1650       return false;
1651   }
1652
1653   CC = TmpCC;
1654   return true;
1655 }
1656
1657 bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
1658   // FIXME: Handle more intrinsics.
1659   switch (II->getIntrinsicID()) {
1660   default: return false;
1661   case Intrinsic::frameaddress: {
1662     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
1663     MFI->setFrameAddressIsTaken(true);
1664
1665     const AArch64RegisterInfo *RegInfo =
1666       static_cast<const AArch64RegisterInfo *>(TM.getRegisterInfo());
1667     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
1668     unsigned SrcReg = FramePtr;
1669
1670     // Recursively load frame address
1671     // ldr x0, [fp]
1672     // ldr x0, [x0]
1673     // ldr x0, [x0]
1674     // ...
1675     unsigned DestReg;
1676     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
1677     while (Depth--) {
1678       DestReg = createResultReg(&AArch64::GPR64RegClass);
1679       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1680               TII.get(AArch64::LDRXui), DestReg)
1681         .addReg(SrcReg).addImm(0);
1682       SrcReg = DestReg;
1683     }
1684
1685     UpdateValueMap(II, SrcReg);
1686     return true;
1687   }
1688   case Intrinsic::memcpy:
1689   case Intrinsic::memmove: {
1690     const auto *MTI = cast<MemTransferInst>(II);
1691     // Don't handle volatile.
1692     if (MTI->isVolatile())
1693       return false;
1694
1695     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
1696     // we would emit dead code because we don't currently handle memmoves.
1697     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
1698     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
1699       // Small memcpy's are common enough that we want to do them without a call
1700       // if possible.
1701       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
1702       unsigned Alignment = MTI->getAlignment();
1703       if (IsMemCpySmall(Len, Alignment)) {
1704         Address Dest, Src;
1705         if (!ComputeAddress(MTI->getRawDest(), Dest) ||
1706             !ComputeAddress(MTI->getRawSource(), Src))
1707           return false;
1708         if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
1709           return true;
1710       }
1711     }
1712
1713     if (!MTI->getLength()->getType()->isIntegerTy(64))
1714       return false;
1715
1716     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
1717       // Fast instruction selection doesn't support the special
1718       // address spaces.
1719       return false;
1720
1721     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
1722     return LowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
1723   }
1724   case Intrinsic::memset: {
1725     const MemSetInst *MSI = cast<MemSetInst>(II);
1726     // Don't handle volatile.
1727     if (MSI->isVolatile())
1728       return false;
1729
1730     if (!MSI->getLength()->getType()->isIntegerTy(64))
1731       return false;
1732
1733     if (MSI->getDestAddressSpace() > 255)
1734       // Fast instruction selection doesn't support the special
1735       // address spaces.
1736       return false;
1737
1738     return LowerCallTo(II, "memset", II->getNumArgOperands() - 2);
1739   }
1740   case Intrinsic::trap: {
1741     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
1742         .addImm(1);
1743     return true;
1744   }
1745   case Intrinsic::sqrt: {
1746     Type *RetTy = II->getCalledFunction()->getReturnType();
1747
1748     MVT VT;
1749     if (!isTypeLegal(RetTy, VT))
1750       return false;
1751
1752     unsigned Op0Reg = getRegForValue(II->getOperand(0));
1753     if (!Op0Reg)
1754       return false;
1755     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
1756
1757     unsigned ResultReg = FastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
1758     if (!ResultReg)
1759       return false;
1760
1761     UpdateValueMap(II, ResultReg);
1762     return true;
1763   }
1764   case Intrinsic::sadd_with_overflow:
1765   case Intrinsic::uadd_with_overflow:
1766   case Intrinsic::ssub_with_overflow:
1767   case Intrinsic::usub_with_overflow:
1768   case Intrinsic::smul_with_overflow:
1769   case Intrinsic::umul_with_overflow: {
1770     // This implements the basic lowering of the xalu with overflow intrinsics.
1771     const Function *Callee = II->getCalledFunction();
1772     auto *Ty = cast<StructType>(Callee->getReturnType());
1773     Type *RetTy = Ty->getTypeAtIndex(0U);
1774     Type *CondTy = Ty->getTypeAtIndex(1);
1775
1776     MVT VT;
1777     if (!isTypeLegal(RetTy, VT))
1778       return false;
1779
1780     if (VT != MVT::i32 && VT != MVT::i64)
1781       return false;
1782
1783     const Value *LHS = II->getArgOperand(0);
1784     const Value *RHS = II->getArgOperand(1);
1785     // Canonicalize immediate to the RHS.
1786     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
1787         isCommutativeIntrinsic(II))
1788       std::swap(LHS, RHS);
1789
1790     unsigned LHSReg = getRegForValue(LHS);
1791     if (!LHSReg)
1792       return false;
1793     bool LHSIsKill = hasTrivialKill(LHS);
1794
1795     // Check if the immediate can be encoded in the instruction and if we should
1796     // invert the instruction (adds -> subs) to handle negative immediates.
1797     bool UseImm = false;
1798     bool UseInverse = false;
1799     uint64_t Imm = 0;
1800     if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1801       if (C->isNegative()) {
1802         UseInverse = true;
1803         Imm = -(C->getSExtValue());
1804       } else
1805         Imm = C->getZExtValue();
1806
1807       if (isUInt<12>(Imm))
1808         UseImm = true;
1809
1810       UseInverse = UseImm && UseInverse;
1811     }
1812
1813     static const unsigned OpcTable[2][2][2] = {
1814       { {AArch64::ADDSWrr, AArch64::ADDSXrr},
1815         {AArch64::ADDSWri, AArch64::ADDSXri} },
1816       { {AArch64::SUBSWrr, AArch64::SUBSXrr},
1817         {AArch64::SUBSWri, AArch64::SUBSXri} }
1818     };
1819     unsigned Opc = 0;
1820     unsigned MulReg = 0;
1821     unsigned RHSReg = 0;
1822     bool RHSIsKill = false;
1823     AArch64CC::CondCode CC = AArch64CC::Invalid;
1824     bool Is64Bit = VT == MVT::i64;
1825     switch (II->getIntrinsicID()) {
1826     default: llvm_unreachable("Unexpected intrinsic!");
1827     case Intrinsic::sadd_with_overflow:
1828       Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
1829     case Intrinsic::uadd_with_overflow:
1830       Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::HS; break;
1831     case Intrinsic::ssub_with_overflow:
1832       Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
1833     case Intrinsic::usub_with_overflow:
1834       Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::LO; break;
1835     case Intrinsic::smul_with_overflow: {
1836       CC = AArch64CC::NE;
1837       RHSReg = getRegForValue(RHS);
1838       if (!RHSReg)
1839         return false;
1840       RHSIsKill = hasTrivialKill(RHS);
1841
1842       if (VT == MVT::i32) {
1843         MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1844         unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32);
1845         MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
1846                                             AArch64::sub_32);
1847         ShiftReg = FastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
1848                                               AArch64::sub_32);
1849         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
1850         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1851                 TII.get(AArch64::SUBSWrs), CmpReg)
1852           .addReg(ShiftReg, getKillRegState(true))
1853           .addReg(MulReg, getKillRegState(false))
1854           .addImm(159); // 159 <-> asr #31
1855       } else {
1856         assert(VT == MVT::i64 && "Unexpected value type.");
1857         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1858         unsigned SMULHReg = FastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
1859                                         RHSReg, RHSIsKill);
1860         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
1861         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1862                 TII.get(AArch64::SUBSXrs), CmpReg)
1863           .addReg(SMULHReg, getKillRegState(true))
1864           .addReg(MulReg, getKillRegState(false))
1865           .addImm(191); // 191 <-> asr #63
1866       }
1867       break;
1868     }
1869     case Intrinsic::umul_with_overflow: {
1870       CC = AArch64CC::NE;
1871       RHSReg = getRegForValue(RHS);
1872       if (!RHSReg)
1873         return false;
1874       RHSIsKill = hasTrivialKill(RHS);
1875
1876       if (VT == MVT::i32) {
1877         MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1878         unsigned CmpReg = createResultReg(TLI.getRegClassFor(MVT::i64));
1879         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1880                 TII.get(AArch64::SUBSXrs), CmpReg)
1881           .addReg(AArch64::XZR, getKillRegState(true))
1882           .addReg(MulReg, getKillRegState(false))
1883           .addImm(96); // 96 <-> lsr #32
1884         MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
1885                                             AArch64::sub_32);
1886       } else {
1887         assert(VT == MVT::i64 && "Unexpected value type.");
1888         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1889         unsigned UMULHReg = FastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
1890                                         RHSReg, RHSIsKill);
1891         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
1892         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1893                 TII.get(AArch64::SUBSXrr), CmpReg)
1894         .addReg(AArch64::XZR, getKillRegState(true))
1895         .addReg(UMULHReg, getKillRegState(false));
1896       }
1897       break;
1898     }
1899     }
1900
1901     if (!UseImm) {
1902       RHSReg = getRegForValue(RHS);
1903       if (!RHSReg)
1904         return false;
1905       RHSIsKill = hasTrivialKill(RHS);
1906     }
1907
1908     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
1909     if (Opc) {
1910       MachineInstrBuilder MIB;
1911       MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
1912                     ResultReg)
1913               .addReg(LHSReg, getKillRegState(LHSIsKill));
1914       if (UseImm) {
1915         MIB.addImm(Imm);
1916         MIB.addImm(0);
1917       } else
1918         MIB.addReg(RHSReg, getKillRegState(RHSIsKill));
1919     }
1920     else
1921       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1922               TII.get(TargetOpcode::COPY), ResultReg)
1923         .addReg(MulReg);
1924
1925     unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
1926     assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
1927     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1928             ResultReg2)
1929       .addReg(AArch64::WZR, getKillRegState(true))
1930       .addReg(AArch64::WZR, getKillRegState(true))
1931       .addImm(getInvertedCondCode(CC));
1932
1933     UpdateValueMap(II, ResultReg, 2);
1934     return true;
1935   }
1936   }
1937   return false;
1938 }
1939
1940 bool AArch64FastISel::SelectRet(const Instruction *I) {
1941   const ReturnInst *Ret = cast<ReturnInst>(I);
1942   const Function &F = *I->getParent()->getParent();
1943
1944   if (!FuncInfo.CanLowerReturn)
1945     return false;
1946
1947   if (F.isVarArg())
1948     return false;
1949
1950   // Build a list of return value registers.
1951   SmallVector<unsigned, 4> RetRegs;
1952
1953   if (Ret->getNumOperands() > 0) {
1954     CallingConv::ID CC = F.getCallingConv();
1955     SmallVector<ISD::OutputArg, 4> Outs;
1956     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
1957
1958     // Analyze operands of the call, assigning locations to each operand.
1959     SmallVector<CCValAssign, 16> ValLocs;
1960     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
1961                    I->getContext());
1962     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
1963                                                      : RetCC_AArch64_AAPCS;
1964     CCInfo.AnalyzeReturn(Outs, RetCC);
1965
1966     // Only handle a single return value for now.
1967     if (ValLocs.size() != 1)
1968       return false;
1969
1970     CCValAssign &VA = ValLocs[0];
1971     const Value *RV = Ret->getOperand(0);
1972
1973     // Don't bother handling odd stuff for now.
1974     if (VA.getLocInfo() != CCValAssign::Full)
1975       return false;
1976     // Only handle register returns for now.
1977     if (!VA.isRegLoc())
1978       return false;
1979     unsigned Reg = getRegForValue(RV);
1980     if (Reg == 0)
1981       return false;
1982
1983     unsigned SrcReg = Reg + VA.getValNo();
1984     unsigned DestReg = VA.getLocReg();
1985     // Avoid a cross-class copy. This is very unlikely.
1986     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
1987       return false;
1988
1989     EVT RVEVT = TLI.getValueType(RV->getType());
1990     if (!RVEVT.isSimple())
1991       return false;
1992
1993     // Vectors (of > 1 lane) in big endian need tricky handling.
1994     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
1995       return false;
1996
1997     MVT RVVT = RVEVT.getSimpleVT();
1998     if (RVVT == MVT::f128)
1999       return false;
2000     MVT DestVT = VA.getValVT();
2001     // Special handling for extended integers.
2002     if (RVVT != DestVT) {
2003       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2004         return false;
2005
2006       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
2007         return false;
2008
2009       bool isZExt = Outs[0].Flags.isZExt();
2010       SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
2011       if (SrcReg == 0)
2012         return false;
2013     }
2014
2015     // Make the copy.
2016     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2017             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
2018
2019     // Add register to return instruction.
2020     RetRegs.push_back(VA.getLocReg());
2021   }
2022
2023   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2024                                     TII.get(AArch64::RET_ReallyLR));
2025   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
2026     MIB.addReg(RetRegs[i], RegState::Implicit);
2027   return true;
2028 }
2029
2030 bool AArch64FastISel::SelectTrunc(const Instruction *I) {
2031   Type *DestTy = I->getType();
2032   Value *Op = I->getOperand(0);
2033   Type *SrcTy = Op->getType();
2034
2035   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2036   EVT DestEVT = TLI.getValueType(DestTy, true);
2037   if (!SrcEVT.isSimple())
2038     return false;
2039   if (!DestEVT.isSimple())
2040     return false;
2041
2042   MVT SrcVT = SrcEVT.getSimpleVT();
2043   MVT DestVT = DestEVT.getSimpleVT();
2044
2045   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2046       SrcVT != MVT::i8)
2047     return false;
2048   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
2049       DestVT != MVT::i1)
2050     return false;
2051
2052   unsigned SrcReg = getRegForValue(Op);
2053   if (!SrcReg)
2054     return false;
2055
2056   // If we're truncating from i64 to a smaller non-legal type then generate an
2057   // AND.  Otherwise, we know the high bits are undefined and a truncate doesn't
2058   // generate any code.
2059   if (SrcVT == MVT::i64) {
2060     uint64_t Mask = 0;
2061     switch (DestVT.SimpleTy) {
2062     default:
2063       // Trunc i64 to i32 is handled by the target-independent fast-isel.
2064       return false;
2065     case MVT::i1:
2066       Mask = 0x1;
2067       break;
2068     case MVT::i8:
2069       Mask = 0xff;
2070       break;
2071     case MVT::i16:
2072       Mask = 0xffff;
2073       break;
2074     }
2075     // Issue an extract_subreg to get the lower 32-bits.
2076     unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true,
2077                                                 AArch64::sub_32);
2078     MRI.constrainRegClass(Reg32, &AArch64::GPR32RegClass);
2079     // Create the AND instruction which performs the actual truncation.
2080     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
2081     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
2082             ANDReg)
2083         .addReg(Reg32)
2084         .addImm(AArch64_AM::encodeLogicalImmediate(Mask, 32));
2085     SrcReg = ANDReg;
2086   }
2087
2088   UpdateValueMap(I, SrcReg);
2089   return true;
2090 }
2091
2092 unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
2093   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
2094           DestVT == MVT::i64) &&
2095          "Unexpected value type.");
2096   // Handle i8 and i16 as i32.
2097   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2098     DestVT = MVT::i32;
2099
2100   if (isZExt) {
2101     MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2102     unsigned ResultReg = createResultReg(&AArch64::GPR32spRegClass);
2103     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
2104             ResultReg)
2105         .addReg(SrcReg)
2106         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2107
2108     if (DestVT == MVT::i64) {
2109       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
2110       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
2111       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2112       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2113               TII.get(AArch64::SUBREG_TO_REG), Reg64)
2114           .addImm(0)
2115           .addReg(ResultReg)
2116           .addImm(AArch64::sub_32);
2117       ResultReg = Reg64;
2118     }
2119     return ResultReg;
2120   } else {
2121     if (DestVT == MVT::i64) {
2122       // FIXME: We're SExt i1 to i64.
2123       return 0;
2124     }
2125     unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
2126     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SBFMWri),
2127             ResultReg)
2128         .addReg(SrcReg)
2129         .addImm(0)
2130         .addImm(0);
2131     return ResultReg;
2132   }
2133 }
2134
2135 unsigned AArch64FastISel::Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2136                                       unsigned Op1, bool Op1IsKill) {
2137   unsigned Opc, ZReg;
2138   switch (RetVT.SimpleTy) {
2139   default: return 0;
2140   case MVT::i8:
2141   case MVT::i16:
2142   case MVT::i32:
2143     RetVT = MVT::i32;
2144     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
2145   case MVT::i64:
2146     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
2147   }
2148
2149   // Create the base instruction, then add the operands.
2150   unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
2151   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2152     .addReg(Op0, getKillRegState(Op0IsKill))
2153     .addReg(Op1, getKillRegState(Op1IsKill))
2154     .addReg(ZReg, getKillRegState(true));
2155
2156   return ResultReg;
2157 }
2158
2159 unsigned AArch64FastISel::Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2160                                         unsigned Op1, bool Op1IsKill) {
2161   if (RetVT != MVT::i64)
2162     return 0;
2163
2164   // Create the base instruction, then add the operands.
2165   unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
2166   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SMADDLrrr),
2167           ResultReg)
2168     .addReg(Op0, getKillRegState(Op0IsKill))
2169     .addReg(Op1, getKillRegState(Op1IsKill))
2170     .addReg(AArch64::XZR, getKillRegState(true));
2171
2172   return ResultReg;
2173 }
2174
2175 unsigned AArch64FastISel::Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2176                                         unsigned Op1, bool Op1IsKill) {
2177   if (RetVT != MVT::i64)
2178     return 0;
2179
2180   // Create the base instruction, then add the operands.
2181   unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
2182   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::UMADDLrrr),
2183           ResultReg)
2184     .addReg(Op0, getKillRegState(Op0IsKill))
2185     .addReg(Op1, getKillRegState(Op1IsKill))
2186     .addReg(AArch64::XZR, getKillRegState(true));
2187
2188   return ResultReg;
2189 }
2190
2191 unsigned AArch64FastISel::Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2192                                       uint64_t Shift) {
2193   unsigned Opc, ImmR, ImmS;
2194   switch (RetVT.SimpleTy) {
2195   default: return 0;
2196   case MVT::i8:
2197   case MVT::i16:
2198   case MVT::i32:
2199     RetVT = MVT::i32;
2200     Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 31 - Shift; break;
2201   case MVT::i64:
2202     Opc = AArch64::UBFMXri; ImmR = -Shift % 64; ImmS = 63 - Shift; break;
2203   }
2204
2205   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, ImmR,
2206                           ImmS);
2207 }
2208
2209 unsigned AArch64FastISel::Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2210                                       uint64_t Shift) {
2211   unsigned Opc, ImmS;
2212   switch (RetVT.SimpleTy) {
2213   default: return 0;
2214   case MVT::i8:
2215   case MVT::i16:
2216   case MVT::i32:
2217     RetVT = MVT::i32;
2218     Opc = AArch64::UBFMWri; ImmS = 31; break;
2219   case MVT::i64:
2220     Opc = AArch64::UBFMXri; ImmS = 63; break;
2221   }
2222
2223   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
2224                           ImmS);
2225 }
2226
2227 unsigned AArch64FastISel::Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2228                                       uint64_t Shift) {
2229   unsigned Opc, ImmS;
2230   switch (RetVT.SimpleTy) {
2231   default: return 0;
2232   case MVT::i8:
2233   case MVT::i16:
2234   case MVT::i32:
2235     RetVT = MVT::i32;
2236     Opc = AArch64::SBFMWri; ImmS = 31; break;
2237   case MVT::i64:
2238     Opc = AArch64::SBFMXri; ImmS = 63; break;
2239   }
2240
2241   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
2242                           ImmS);
2243 }
2244
2245 unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
2246                                      bool isZExt) {
2247   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
2248
2249   // FastISel does not have plumbing to deal with extensions where the SrcVT or
2250   // DestVT are odd things, so test to make sure that they are both types we can
2251   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
2252   // bail out to SelectionDAG.
2253   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
2254        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
2255       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
2256        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
2257     return 0;
2258
2259   unsigned Opc;
2260   unsigned Imm = 0;
2261
2262   switch (SrcVT.SimpleTy) {
2263   default:
2264     return 0;
2265   case MVT::i1:
2266     return Emiti1Ext(SrcReg, DestVT, isZExt);
2267   case MVT::i8:
2268     if (DestVT == MVT::i64)
2269       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2270     else
2271       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
2272     Imm = 7;
2273     break;
2274   case MVT::i16:
2275     if (DestVT == MVT::i64)
2276       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2277     else
2278       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
2279     Imm = 15;
2280     break;
2281   case MVT::i32:
2282     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
2283     Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2284     Imm = 31;
2285     break;
2286   }
2287
2288   // Handle i8 and i16 as i32.
2289   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2290     DestVT = MVT::i32;
2291   else if (DestVT == MVT::i64) {
2292     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2293     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2294             TII.get(AArch64::SUBREG_TO_REG), Src64)
2295         .addImm(0)
2296         .addReg(SrcReg)
2297         .addImm(AArch64::sub_32);
2298     SrcReg = Src64;
2299   }
2300
2301   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2302   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2303       .addReg(SrcReg)
2304       .addImm(0)
2305       .addImm(Imm);
2306
2307   return ResultReg;
2308 }
2309
2310 bool AArch64FastISel::SelectIntExt(const Instruction *I) {
2311   // On ARM, in general, integer casts don't involve legal types; this code
2312   // handles promotable integers.  The high bits for a type smaller than
2313   // the register size are assumed to be undefined.
2314   Type *DestTy = I->getType();
2315   Value *Src = I->getOperand(0);
2316   Type *SrcTy = Src->getType();
2317
2318   bool isZExt = isa<ZExtInst>(I);
2319   unsigned SrcReg = getRegForValue(Src);
2320   if (!SrcReg)
2321     return false;
2322
2323   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2324   EVT DestEVT = TLI.getValueType(DestTy, true);
2325   if (!SrcEVT.isSimple())
2326     return false;
2327   if (!DestEVT.isSimple())
2328     return false;
2329
2330   MVT SrcVT = SrcEVT.getSimpleVT();
2331   MVT DestVT = DestEVT.getSimpleVT();
2332   unsigned ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2333   if (ResultReg == 0)
2334     return false;
2335   UpdateValueMap(I, ResultReg);
2336   return true;
2337 }
2338
2339 bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
2340   EVT DestEVT = TLI.getValueType(I->getType(), true);
2341   if (!DestEVT.isSimple())
2342     return false;
2343
2344   MVT DestVT = DestEVT.getSimpleVT();
2345   if (DestVT != MVT::i64 && DestVT != MVT::i32)
2346     return false;
2347
2348   unsigned DivOpc;
2349   bool is64bit = (DestVT == MVT::i64);
2350   switch (ISDOpcode) {
2351   default:
2352     return false;
2353   case ISD::SREM:
2354     DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
2355     break;
2356   case ISD::UREM:
2357     DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
2358     break;
2359   }
2360   unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
2361   unsigned Src0Reg = getRegForValue(I->getOperand(0));
2362   if (!Src0Reg)
2363     return false;
2364
2365   unsigned Src1Reg = getRegForValue(I->getOperand(1));
2366   if (!Src1Reg)
2367     return false;
2368
2369   unsigned QuotReg = createResultReg(TLI.getRegClassFor(DestVT));
2370   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), QuotReg)
2371       .addReg(Src0Reg)
2372       .addReg(Src1Reg);
2373   // The remainder is computed as numerator - (quotient * denominator) using the
2374   // MSUB instruction.
2375   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2376   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg)
2377       .addReg(QuotReg)
2378       .addReg(Src1Reg)
2379       .addReg(Src0Reg);
2380   UpdateValueMap(I, ResultReg);
2381   return true;
2382 }
2383
2384 bool AArch64FastISel::SelectMul(const Instruction *I) {
2385   EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2386   if (!SrcEVT.isSimple())
2387     return false;
2388   MVT SrcVT = SrcEVT.getSimpleVT();
2389
2390   // Must be simple value type.  Don't handle vectors.
2391   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2392       SrcVT != MVT::i8)
2393     return false;
2394
2395   unsigned Src0Reg = getRegForValue(I->getOperand(0));
2396   if (!Src0Reg)
2397     return false;
2398   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
2399
2400   unsigned Src1Reg = getRegForValue(I->getOperand(1));
2401   if (!Src1Reg)
2402     return false;
2403   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
2404
2405   unsigned ResultReg =
2406     Emit_MUL_rr(SrcVT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
2407
2408   if (!ResultReg)
2409     return false;
2410
2411   UpdateValueMap(I, ResultReg);
2412   return true;
2413 }
2414
2415 bool AArch64FastISel::SelectShift(const Instruction *I, bool IsLeftShift,
2416                                   bool IsArithmetic) {
2417   EVT RetEVT = TLI.getValueType(I->getType(), true);
2418   if (!RetEVT.isSimple())
2419     return false;
2420   MVT RetVT = RetEVT.getSimpleVT();
2421
2422   if (!isa<ConstantInt>(I->getOperand(1)))
2423     return false;
2424
2425   unsigned Op0Reg = getRegForValue(I->getOperand(0));
2426   if (!Op0Reg)
2427     return false;
2428   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
2429
2430   uint64_t ShiftVal = cast<ConstantInt>(I->getOperand(1))->getZExtValue();
2431
2432   unsigned ResultReg;
2433   if (IsLeftShift)
2434     ResultReg = Emit_LSL_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2435   else {
2436     if (IsArithmetic)
2437       ResultReg = Emit_ASR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2438     else
2439       ResultReg = Emit_LSR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2440   }
2441
2442   if (!ResultReg)
2443     return false;
2444
2445   UpdateValueMap(I, ResultReg);
2446   return true;
2447 }
2448
2449 bool AArch64FastISel::SelectBitCast(const Instruction *I) {
2450   MVT RetVT, SrcVT;
2451
2452   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
2453     return false;
2454   if (!isTypeLegal(I->getType(), RetVT))
2455     return false;
2456
2457   unsigned Opc;
2458   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
2459     Opc = AArch64::FMOVWSr;
2460   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
2461     Opc = AArch64::FMOVXDr;
2462   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
2463     Opc = AArch64::FMOVSWr;
2464   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
2465     Opc = AArch64::FMOVDXr;
2466   else
2467     return false;
2468
2469   unsigned Op0Reg = getRegForValue(I->getOperand(0));
2470   if (!Op0Reg)
2471     return false;
2472   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
2473   unsigned ResultReg = FastEmitInst_r(Opc, TLI.getRegClassFor(RetVT),
2474                                       Op0Reg, Op0IsKill);
2475
2476   if (!ResultReg)
2477     return false;
2478
2479   UpdateValueMap(I, ResultReg);
2480   return true;
2481 }
2482
2483 bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
2484   switch (I->getOpcode()) {
2485   default:
2486     break;
2487   case Instruction::Load:
2488     return SelectLoad(I);
2489   case Instruction::Store:
2490     return SelectStore(I);
2491   case Instruction::Br:
2492     return SelectBranch(I);
2493   case Instruction::IndirectBr:
2494     return SelectIndirectBr(I);
2495   case Instruction::FCmp:
2496   case Instruction::ICmp:
2497     return SelectCmp(I);
2498   case Instruction::Select:
2499     return SelectSelect(I);
2500   case Instruction::FPExt:
2501     return SelectFPExt(I);
2502   case Instruction::FPTrunc:
2503     return SelectFPTrunc(I);
2504   case Instruction::FPToSI:
2505     return SelectFPToInt(I, /*Signed=*/true);
2506   case Instruction::FPToUI:
2507     return SelectFPToInt(I, /*Signed=*/false);
2508   case Instruction::SIToFP:
2509     return SelectIntToFP(I, /*Signed=*/true);
2510   case Instruction::UIToFP:
2511     return SelectIntToFP(I, /*Signed=*/false);
2512   case Instruction::SRem:
2513     return SelectRem(I, ISD::SREM);
2514   case Instruction::URem:
2515     return SelectRem(I, ISD::UREM);
2516   case Instruction::Ret:
2517     return SelectRet(I);
2518   case Instruction::Trunc:
2519     return SelectTrunc(I);
2520   case Instruction::ZExt:
2521   case Instruction::SExt:
2522     return SelectIntExt(I);
2523
2524   // FIXME: All of these should really be handled by the target-independent
2525   // selector -> improve FastISel tblgen.
2526   case Instruction::Mul:
2527     return SelectMul(I);
2528   case Instruction::Shl:
2529       return SelectShift(I, /*IsLeftShift=*/true, /*IsArithmetic=*/false);
2530   case Instruction::LShr:
2531     return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/false);
2532   case Instruction::AShr:
2533     return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/true);
2534   case Instruction::BitCast:
2535     return SelectBitCast(I);
2536   }
2537   return false;
2538   // Silence warnings.
2539   (void)&CC_AArch64_DarwinPCS_VarArg;
2540 }
2541
2542 namespace llvm {
2543 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
2544                                         const TargetLibraryInfo *libInfo) {
2545   return new AArch64FastISel(funcInfo, libInfo);
2546 }
2547 }