lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/Analysis/BranchProbabilityInfo.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/FastISel.h"
  23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  24 #include "llvm/CodeGen/MachineConstantPool.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/IR/CallingConv.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/GetElementPtrTypeIterator.h"
  33 #include "llvm/IR/GlobalAlias.h"
  34 #include "llvm/IR/GlobalVariable.h"
  35 #include "llvm/IR/Instructions.h"
  36 #include "llvm/IR/IntrinsicInst.h"
  37 #include "llvm/IR/Operator.h"
  38 #include "llvm/Support/CommandLine.h"
  39 using namespace llvm;
  40
  41 namespace {
  42
  43 class AArch64FastISel : public FastISel {
  44
  45   class Address {
  46   public:
  47     typedef enum {
  48       RegBase,
  49       FrameIndexBase
  50     } BaseKind;
  51
  52   private:
  53     BaseKind Kind;
  54     union {
  55       unsigned Reg;
  56       int FI;
  57     } Base;
  58     int64_t Offset;
  59     const GlobalValue *GV;
  60
  61   public:
  62     Address() : Kind(RegBase), Offset(0), GV(nullptr) { Base.Reg = 0; }
  63     void setKind(BaseKind K) { Kind = K; }
  64     BaseKind getKind() const { return Kind; }
  65     bool isRegBase() const { return Kind == RegBase; }
  66     bool isFIBase() const { return Kind == FrameIndexBase; }
  67     void setReg(unsigned Reg) {
  68       assert(isRegBase() && "Invalid base register access!");
  69       Base.Reg = Reg;
  70     }
  71     unsigned getReg() const {
  72       assert(isRegBase() && "Invalid base register access!");
  73       return Base.Reg;
  74     }
  75     void setFI(unsigned FI) {
  76       assert(isFIBase() && "Invalid base frame index  access!");
  77       Base.FI = FI;
  78     }
  79     unsigned getFI() const {
  80       assert(isFIBase() && "Invalid base frame index access!");
  81       return Base.FI;
  82     }
  83     void setOffset(int64_t O) { Offset = O; }
  84     int64_t getOffset() { return Offset; }
  85
  86     void setGlobalValue(const GlobalValue *G) { GV = G; }
  87     const GlobalValue *getGlobalValue() { return GV; }
  88
  89     bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); }
  90   };
  91
  92   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
  93   /// make the right decision when generating code for different targets.
  94   const AArch64Subtarget *Subtarget;
  95   LLVMContext *Context;
  96
  97   bool FastLowerArguments() override;
  98   bool FastLowerCall(CallLoweringInfo &CLI) override;
  99   bool FastLowerIntrinsicCall(const IntrinsicInst *II) override;
 100
 101 private:
 102   // Selection routines.
 103   bool SelectLoad(const Instruction *I);
 104   bool SelectStore(const Instruction *I);
 105   bool SelectBranch(const Instruction *I);
 106   bool SelectIndirectBr(const Instruction *I);
 107   bool SelectCmp(const Instruction *I);
 108   bool SelectSelect(const Instruction *I);
 109   bool SelectFPExt(const Instruction *I);
 110   bool SelectFPTrunc(const Instruction *I);
 111   bool SelectFPToInt(const Instruction *I, bool Signed);
 112   bool SelectIntToFP(const Instruction *I, bool Signed);
 113   bool SelectRem(const Instruction *I, unsigned ISDOpcode);
 114   bool SelectRet(const Instruction *I);
 115   bool SelectTrunc(const Instruction *I);
 116   bool SelectIntExt(const Instruction *I);
 117   bool SelectMul(const Instruction *I);
 118   bool SelectShift(const Instruction *I, bool IsLeftShift, bool IsArithmetic);
 119   bool SelectBitCast(const Instruction *I);
 120
 121   // Utility helper routines.
 122   bool isTypeLegal(Type *Ty, MVT &VT);
 123   bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
 124   bool ComputeAddress(const Value *Obj, Address &Addr);
 125   bool ComputeCallAddress(const Value *V, Address &Addr);
 126   bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
 127                        bool UseUnscaled);
 128   void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 129                             unsigned Flags, bool UseUnscaled);
 130   bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
 131   bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 132                           unsigned Alignment);
 133   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 134                          const Value *Cond);
 135
 136   // Emit functions.
 137   bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
 138   bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 139                 bool UseUnscaled = false);
 140   bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 141                  bool UseUnscaled = false);
 142   unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 143   unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 144   unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 145                        unsigned Op1, bool Op1IsKill);
 146   unsigned Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 147                          unsigned Op1, bool Op1IsKill);
 148   unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 149                          unsigned Op1, bool Op1IsKill);
 150   unsigned Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 151   unsigned Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 152   unsigned Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
 153
 154   unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
 155   unsigned AArch64MaterializeGV(const GlobalValue *GV);
 156
 157   // Call handling routines.
 158 private:
 159   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 160   bool ProcessCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 161                        unsigned &NumBytes);
 162   bool FinishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 163
 164 public:
 165   // Backend specific FastISel code.
 166   unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
 167   unsigned TargetMaterializeConstant(const Constant *C) override;
 168
 169   explicit AArch64FastISel(FunctionLoweringInfo &funcInfo,
 170                          const TargetLibraryInfo *libInfo)
 171       : FastISel(funcInfo, libInfo) {
 172     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 173     Context = &funcInfo.Fn->getContext();
 174   }
 175
 176   bool TargetSelectInstruction(const Instruction *I) override;
 177
 178 #include "AArch64GenFastISel.inc"
 179 };
 180
 181 } // end anonymous namespace
 182
 183 #include "AArch64GenCallingConv.inc"
 184
 185 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 186   if (CC == CallingConv::WebKit_JS)
 187     return CC_AArch64_WebKit_JS;
 188   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 189 }
 190
 191 unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
 192   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
 193          "Alloca should always return a pointer.");
 194
 195   // Don't handle dynamic allocas.
 196   if (!FuncInfo.StaticAllocaMap.count(AI))
 197     return 0;
 198
 199   DenseMap<const AllocaInst *, int>::iterator SI =
 200       FuncInfo.StaticAllocaMap.find(AI);
 201
 202   if (SI != FuncInfo.StaticAllocaMap.end()) {
 203     unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
 204     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 205             ResultReg)
 206         .addFrameIndex(SI->second)
 207         .addImm(0)
 208         .addImm(0);
 209     return ResultReg;
 210   }
 211
 212   return 0;
 213 }
 214
 215 unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
 216   if (VT != MVT::f32 && VT != MVT::f64)
 217     return 0;
 218
 219   const APFloat Val = CFP->getValueAPF();
 220   bool is64bit = (VT == MVT::f64);
 221
 222   // This checks to see if we can use FMOV instructions to materialize
 223   // a constant, otherwise we have to materialize via the constant pool.
 224   if (TLI.isFPImmLegal(Val, VT)) {
 225     int Imm;
 226     unsigned Opc;
 227     if (is64bit) {
 228       Imm = AArch64_AM::getFP64Imm(Val);
 229       Opc = AArch64::FMOVDi;
 230     } else {
 231       Imm = AArch64_AM::getFP32Imm(Val);
 232       Opc = AArch64::FMOVSi;
 233     }
 234     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 235     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 236         .addImm(Imm);
 237     return ResultReg;
 238   }
 239
 240   // Materialize via constant pool.  MachineConstantPool wants an explicit
 241   // alignment.
 242   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 243   if (Align == 0)
 244     Align = DL.getTypeAllocSize(CFP->getType());
 245
 246   unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 247   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 248   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 249           ADRPReg).addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGE);
 250
 251   unsigned Opc = is64bit ? AArch64::LDRDui : AArch64::LDRSui;
 252   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 253   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 254       .addReg(ADRPReg)
 255       .addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 256   return ResultReg;
 257 }
 258
 259 unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
 260   // We can't handle thread-local variables quickly yet.
 261   if (GV->isThreadLocal())
 262     return 0;
 263
 264   // MachO still uses GOT for large code-model accesses, but ELF requires
 265   // movz/movk sequences, which FastISel doesn't handle yet.
 266   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
 267     return 0;
 268
 269   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 270
 271   EVT DestEVT = TLI.getValueType(GV->getType(), true);
 272   if (!DestEVT.isSimple())
 273     return 0;
 274
 275   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 276   unsigned ResultReg;
 277
 278   if (OpFlags & AArch64II::MO_GOT) {
 279     // ADRP + LDRX
 280     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 281             ADRPReg)
 282         .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 283
 284     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 285     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 286             ResultReg)
 287         .addReg(ADRPReg)
 288         .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 289                           AArch64II::MO_NC);
 290   } else {
 291     // ADRP + ADDX
 292     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 293             ADRPReg).addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 294
 295     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 296     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 297             ResultReg)
 298         .addReg(ADRPReg)
 299         .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 300         .addImm(0);
 301   }
 302   return ResultReg;
 303 }
 304
 305 unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
 306   EVT CEVT = TLI.getValueType(C->getType(), true);
 307
 308   // Only handle simple types.
 309   if (!CEVT.isSimple())
 310     return 0;
 311   MVT VT = CEVT.getSimpleVT();
 312
 313   // FIXME: Handle ConstantInt.
 314   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 315     return AArch64MaterializeFP(CFP, VT);
 316   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 317     return AArch64MaterializeGV(GV);
 318
 319   return 0;
 320 }
 321
 322 // Computes the address to get to an object.
 323 bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
 324   const User *U = nullptr;
 325   unsigned Opcode = Instruction::UserOp1;
 326   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 327     // Don't walk into other basic blocks unless the object is an alloca from
 328     // another block, otherwise it may not have a virtual register assigned.
 329     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 330         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 331       Opcode = I->getOpcode();
 332       U = I;
 333     }
 334   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 335     Opcode = C->getOpcode();
 336     U = C;
 337   }
 338
 339   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 340     if (Ty->getAddressSpace() > 255)
 341       // Fast instruction selection doesn't support the special
 342       // address spaces.
 343       return false;
 344
 345   switch (Opcode) {
 346   default:
 347     break;
 348   case Instruction::BitCast: {
 349     // Look through bitcasts.
 350     return ComputeAddress(U->getOperand(0), Addr);
 351   }
 352   case Instruction::IntToPtr: {
 353     // Look past no-op inttoptrs.
 354     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 355       return ComputeAddress(U->getOperand(0), Addr);
 356     break;
 357   }
 358   case Instruction::PtrToInt: {
 359     // Look past no-op ptrtoints.
 360     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 361       return ComputeAddress(U->getOperand(0), Addr);
 362     break;
 363   }
 364   case Instruction::GetElementPtr: {
 365     Address SavedAddr = Addr;
 366     uint64_t TmpOffset = Addr.getOffset();
 367
 368     // Iterate through the GEP folding the constants into offsets where
 369     // we can.
 370     gep_type_iterator GTI = gep_type_begin(U);
 371     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
 372          ++i, ++GTI) {
 373       const Value *Op = *i;
 374       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
 375         const StructLayout *SL = DL.getStructLayout(STy);
 376         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 377         TmpOffset += SL->getElementOffset(Idx);
 378       } else {
 379         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 380         for (;;) {
 381           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 382             // Constant-offset addressing.
 383             TmpOffset += CI->getSExtValue() * S;
 384             break;
 385           }
 386           if (canFoldAddIntoGEP(U, Op)) {
 387             // A compatible add with a constant operand. Fold the constant.
 388             ConstantInt *CI =
 389                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 390             TmpOffset += CI->getSExtValue() * S;
 391             // Iterate on the other operand.
 392             Op = cast<AddOperator>(Op)->getOperand(0);
 393             continue;
 394           }
 395           // Unsupported
 396           goto unsupported_gep;
 397         }
 398       }
 399     }
 400
 401     // Try to grab the base operand now.
 402     Addr.setOffset(TmpOffset);
 403     if (ComputeAddress(U->getOperand(0), Addr))
 404       return true;
 405
 406     // We failed, restore everything and try the other options.
 407     Addr = SavedAddr;
 408
 409   unsupported_gep:
 410     break;
 411   }
 412   case Instruction::Alloca: {
 413     const AllocaInst *AI = cast<AllocaInst>(Obj);
 414     DenseMap<const AllocaInst *, int>::iterator SI =
 415         FuncInfo.StaticAllocaMap.find(AI);
 416     if (SI != FuncInfo.StaticAllocaMap.end()) {
 417       Addr.setKind(Address::FrameIndexBase);
 418       Addr.setFI(SI->second);
 419       return true;
 420     }
 421     break;
 422   }
 423   case Instruction::Add:
 424     // Adds of constants are common and easy enough.
 425     if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
 426       Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
 427       return ComputeAddress(U->getOperand(0), Addr);
 428     }
 429     break;
 430   }
 431
 432   // Try to get this in a register if nothing else has worked.
 433   if (!Addr.isValid())
 434     Addr.setReg(getRegForValue(Obj));
 435   return Addr.isValid();
 436 }
 437
 438 bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) {
 439   const User *U = nullptr;
 440   unsigned Opcode = Instruction::UserOp1;
 441   bool InMBB = true;
 442
 443   if (const auto *I = dyn_cast<Instruction>(V)) {
 444     Opcode = I->getOpcode();
 445     U = I;
 446     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 447   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 448     Opcode = C->getOpcode();
 449     U = C;
 450   }
 451
 452   switch (Opcode) {
 453   default: break;
 454   case Instruction::BitCast:
 455     // Look past bitcasts if its operand is in the same BB.
 456     if (InMBB)
 457       return ComputeCallAddress(U->getOperand(0), Addr);
 458     break;
 459   case Instruction::IntToPtr:
 460     // Look past no-op inttoptrs if its operand is in the same BB.
 461     if (InMBB &&
 462         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 463       return ComputeCallAddress(U->getOperand(0), Addr);
 464     break;
 465   case Instruction::PtrToInt:
 466     // Look past no-op ptrtoints if its operand is in the same BB.
 467     if (InMBB &&
 468         TLI.getValueType(U->getType()) == TLI.getPointerTy())
 469       return ComputeCallAddress(U->getOperand(0), Addr);
 470     break;
 471   }
 472
 473   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 474     Addr.setGlobalValue(GV);
 475     return true;
 476   }
 477
 478   // If all else fails, try to materialize the value in a register.
 479   if (!Addr.getGlobalValue()) {
 480     Addr.setReg(getRegForValue(V));
 481     return Addr.getReg() != 0;
 482   }
 483
 484   return false;
 485 }
 486
 487
 488 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 489   EVT evt = TLI.getValueType(Ty, true);
 490
 491   // Only handle simple types.
 492   if (evt == MVT::Other || !evt.isSimple())
 493     return false;
 494   VT = evt.getSimpleVT();
 495
 496   // This is a legal type, but it's not something we handle in fast-isel.
 497   if (VT == MVT::f128)
 498     return false;
 499
 500   // Handle all other legal types, i.e. a register that will directly hold this
 501   // value.
 502   return TLI.isTypeLegal(VT);
 503 }
 504
 505 bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
 506   if (isTypeLegal(Ty, VT))
 507     return true;
 508
 509   // If this is a type than can be sign or zero-extended to a basic operation
 510   // go ahead and accept it now. For stores, this reflects truncation.
 511   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 512     return true;
 513
 514   return false;
 515 }
 516
 517 bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
 518                                       int64_t ScaleFactor, bool UseUnscaled) {
 519   bool needsLowering = false;
 520   int64_t Offset = Addr.getOffset();
 521   switch (VT.SimpleTy) {
 522   default:
 523     return false;
 524   case MVT::i1:
 525   case MVT::i8:
 526   case MVT::i16:
 527   case MVT::i32:
 528   case MVT::i64:
 529   case MVT::f32:
 530   case MVT::f64:
 531     if (!UseUnscaled)
 532       // Using scaled, 12-bit, unsigned immediate offsets.
 533       needsLowering = ((Offset & 0xfff) != Offset);
 534     else
 535       // Using unscaled, 9-bit, signed immediate offsets.
 536       needsLowering = (Offset > 256 || Offset < -256);
 537     break;
 538   }
 539
 540   //If this is a stack pointer and the offset needs to be simplified then put
 541   // the alloca address into a register, set the base type back to register and
 542   // continue. This should almost never happen.
 543   if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
 544     unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
 545     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 546             ResultReg)
 547         .addFrameIndex(Addr.getFI())
 548         .addImm(0)
 549         .addImm(0);
 550     Addr.setKind(Address::RegBase);
 551     Addr.setReg(ResultReg);
 552   }
 553
 554   // Since the offset is too large for the load/store instruction get the
 555   // reg+offset into a register.
 556   if (needsLowering) {
 557     uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor;
 558     unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false,
 559                                       UnscaledOffset, MVT::i64);
 560     if (ResultReg == 0)
 561       return false;
 562     Addr.setReg(ResultReg);
 563     Addr.setOffset(0);
 564   }
 565   return true;
 566 }
 567
 568 void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
 569                                            const MachineInstrBuilder &MIB,
 570                                            unsigned Flags, bool UseUnscaled) {
 571   int64_t Offset = Addr.getOffset();
 572   // Frame base works a bit differently. Handle it separately.
 573   if (Addr.getKind() == Address::FrameIndexBase) {
 574     int FI = Addr.getFI();
 575     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
 576     // and alignment should be based on the VT.
 577     MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
 578         MachinePointerInfo::getFixedStack(FI, Offset), Flags,
 579         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
 580     // Now add the rest of the operands.
 581     MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
 582   } else {
 583     // Now add the rest of the operands.
 584     MIB.addReg(Addr.getReg());
 585     MIB.addImm(Offset);
 586   }
 587 }
 588
 589 bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 590                                bool UseUnscaled) {
 591   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
 592   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
 593   if (!UseUnscaled && Addr.getOffset() < 0)
 594     UseUnscaled = true;
 595
 596   unsigned Opc;
 597   const TargetRegisterClass *RC;
 598   bool VTIsi1 = false;
 599   int64_t ScaleFactor = 0;
 600   switch (VT.SimpleTy) {
 601   default:
 602     return false;
 603   case MVT::i1:
 604     VTIsi1 = true;
 605   // Intentional fall-through.
 606   case MVT::i8:
 607     Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui;
 608     RC = &AArch64::GPR32RegClass;
 609     ScaleFactor = 1;
 610     break;
 611   case MVT::i16:
 612     Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui;
 613     RC = &AArch64::GPR32RegClass;
 614     ScaleFactor = 2;
 615     break;
 616   case MVT::i32:
 617     Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui;
 618     RC = &AArch64::GPR32RegClass;
 619     ScaleFactor = 4;
 620     break;
 621   case MVT::i64:
 622     Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui;
 623     RC = &AArch64::GPR64RegClass;
 624     ScaleFactor = 8;
 625     break;
 626   case MVT::f32:
 627     Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui;
 628     RC = TLI.getRegClassFor(VT);
 629     ScaleFactor = 4;
 630     break;
 631   case MVT::f64:
 632     Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui;
 633     RC = TLI.getRegClassFor(VT);
 634     ScaleFactor = 8;
 635     break;
 636   }
 637   // Scale the offset.
 638   if (!UseUnscaled) {
 639     int64_t Offset = Addr.getOffset();
 640     if (Offset & (ScaleFactor - 1))
 641       // Retry using an unscaled, 9-bit, signed immediate offset.
 642       return EmitLoad(VT, ResultReg, Addr, /*UseUnscaled*/ true);
 643
 644     Addr.setOffset(Offset / ScaleFactor);
 645   }
 646
 647   // Simplify this down to something we can handle.
 648   if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
 649     return false;
 650
 651   // Create the base instruction, then add the operands.
 652   ResultReg = createResultReg(RC);
 653   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 654                                     TII.get(Opc), ResultReg);
 655   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled);
 656
 657   // Loading an i1 requires special handling.
 658   if (VTIsi1) {
 659     MRI.constrainRegClass(ResultReg, &AArch64::GPR32RegClass);
 660     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 661     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
 662             ANDReg)
 663         .addReg(ResultReg)
 664         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 665     ResultReg = ANDReg;
 666   }
 667   return true;
 668 }
 669
 670 bool AArch64FastISel::SelectLoad(const Instruction *I) {
 671   MVT VT;
 672   // Verify we have a legal type before going any further.  Currently, we handle
 673   // simple types that will directly fit in a register (i32/f32/i64/f64) or
 674   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
 675   if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic())
 676     return false;
 677
 678   // See if we can handle this address.
 679   Address Addr;
 680   if (!ComputeAddress(I->getOperand(0), Addr))
 681     return false;
 682
 683   unsigned ResultReg;
 684   if (!EmitLoad(VT, ResultReg, Addr))
 685     return false;
 686
 687   UpdateValueMap(I, ResultReg);
 688   return true;
 689 }
 690
 691 bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 692                                 bool UseUnscaled) {
 693   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
 694   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
 695   if (!UseUnscaled && Addr.getOffset() < 0)
 696     UseUnscaled = true;
 697
 698   unsigned StrOpc;
 699   bool VTIsi1 = false;
 700   int64_t ScaleFactor = 0;
 701   // Using scaled, 12-bit, unsigned immediate offsets.
 702   switch (VT.SimpleTy) {
 703   default:
 704     return false;
 705   case MVT::i1:
 706     VTIsi1 = true;
 707   case MVT::i8:
 708     StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui;
 709     ScaleFactor = 1;
 710     break;
 711   case MVT::i16:
 712     StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui;
 713     ScaleFactor = 2;
 714     break;
 715   case MVT::i32:
 716     StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui;
 717     ScaleFactor = 4;
 718     break;
 719   case MVT::i64:
 720     StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui;
 721     ScaleFactor = 8;
 722     break;
 723   case MVT::f32:
 724     StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui;
 725     ScaleFactor = 4;
 726     break;
 727   case MVT::f64:
 728     StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui;
 729     ScaleFactor = 8;
 730     break;
 731   }
 732   // Scale the offset.
 733   if (!UseUnscaled) {
 734     int64_t Offset = Addr.getOffset();
 735     if (Offset & (ScaleFactor - 1))
 736       // Retry using an unscaled, 9-bit, signed immediate offset.
 737       return EmitStore(VT, SrcReg, Addr, /*UseUnscaled*/ true);
 738
 739     Addr.setOffset(Offset / ScaleFactor);
 740   }
 741
 742   // Simplify this down to something we can handle.
 743   if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
 744     return false;
 745
 746   // Storing an i1 requires special handling.
 747   if (VTIsi1) {
 748     MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
 749     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 750     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
 751             ANDReg)
 752         .addReg(SrcReg)
 753         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 754     SrcReg = ANDReg;
 755   }
 756   // Create the base instruction, then add the operands.
 757   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 758                                     TII.get(StrOpc)).addReg(SrcReg);
 759   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled);
 760   return true;
 761 }
 762
 763 bool AArch64FastISel::SelectStore(const Instruction *I) {
 764   MVT VT;
 765   Value *Op0 = I->getOperand(0);
 766   // Verify we have a legal type before going any further.  Currently, we handle
 767   // simple types that will directly fit in a register (i32/f32/i64/f64) or
 768   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
 769   if (!isLoadStoreTypeLegal(Op0->getType(), VT) ||
 770       cast<StoreInst>(I)->isAtomic())
 771     return false;
 772
 773   // Get the value to be stored into a register.
 774   unsigned SrcReg = getRegForValue(Op0);
 775   if (SrcReg == 0)
 776     return false;
 777
 778   // See if we can handle this address.
 779   Address Addr;
 780   if (!ComputeAddress(I->getOperand(1), Addr))
 781     return false;
 782
 783   if (!EmitStore(VT, SrcReg, Addr))
 784     return false;
 785   return true;
 786 }
 787
 788 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
 789   switch (Pred) {
 790   case CmpInst::FCMP_ONE:
 791   case CmpInst::FCMP_UEQ:
 792   default:
 793     // AL is our "false" for now. The other two need more compares.
 794     return AArch64CC::AL;
 795   case CmpInst::ICMP_EQ:
 796   case CmpInst::FCMP_OEQ:
 797     return AArch64CC::EQ;
 798   case CmpInst::ICMP_SGT:
 799   case CmpInst::FCMP_OGT:
 800     return AArch64CC::GT;
 801   case CmpInst::ICMP_SGE:
 802   case CmpInst::FCMP_OGE:
 803     return AArch64CC::GE;
 804   case CmpInst::ICMP_UGT:
 805   case CmpInst::FCMP_UGT:
 806     return AArch64CC::HI;
 807   case CmpInst::FCMP_OLT:
 808     return AArch64CC::MI;
 809   case CmpInst::ICMP_ULE:
 810   case CmpInst::FCMP_OLE:
 811     return AArch64CC::LS;
 812   case CmpInst::FCMP_ORD:
 813     return AArch64CC::VC;
 814   case CmpInst::FCMP_UNO:
 815     return AArch64CC::VS;
 816   case CmpInst::FCMP_UGE:
 817     return AArch64CC::PL;
 818   case CmpInst::ICMP_SLT:
 819   case CmpInst::FCMP_ULT:
 820     return AArch64CC::LT;
 821   case CmpInst::ICMP_SLE:
 822   case CmpInst::FCMP_ULE:
 823     return AArch64CC::LE;
 824   case CmpInst::FCMP_UNE:
 825   case CmpInst::ICMP_NE:
 826     return AArch64CC::NE;
 827   case CmpInst::ICMP_UGE:
 828     return AArch64CC::HS;
 829   case CmpInst::ICMP_ULT:
 830     return AArch64CC::LO;
 831   }
 832 }
 833
 834 bool AArch64FastISel::SelectBranch(const Instruction *I) {
 835   const BranchInst *BI = cast<BranchInst>(I);
 836   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
 837   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
 838
 839   AArch64CC::CondCode CC = AArch64CC::NE;
 840   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
 841     if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
 842       // We may not handle every CC for now.
 843       CC = getCompareCC(CI->getPredicate());
 844       if (CC == AArch64CC::AL)
 845         return false;
 846
 847       // Emit the cmp.
 848       if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
 849         return false;
 850
 851       // Emit the branch.
 852       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 853           .addImm(CC)
 854           .addMBB(TBB);
 855
 856       // Obtain the branch weight and add the TrueBB to the successor list.
 857       uint32_t BranchWeight = 0;
 858       if (FuncInfo.BPI)
 859         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 860                                                   TBB->getBasicBlock());
 861       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
 862
 863       FastEmitBranch(FBB, DbgLoc);
 864       return true;
 865     }
 866   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
 867     MVT SrcVT;
 868     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
 869         (isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) {
 870       unsigned CondReg = getRegForValue(TI->getOperand(0));
 871       if (CondReg == 0)
 872         return false;
 873
 874       // Issue an extract_subreg to get the lower 32-bits.
 875       if (SrcVT == MVT::i64)
 876         CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true,
 877                                              AArch64::sub_32);
 878
 879       MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
 880       unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
 881       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 882               TII.get(AArch64::ANDWri), ANDReg)
 883           .addReg(CondReg)
 884           .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 885       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 886               TII.get(AArch64::SUBSWri))
 887           .addReg(ANDReg)
 888           .addReg(ANDReg)
 889           .addImm(0)
 890           .addImm(0);
 891
 892       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
 893         std::swap(TBB, FBB);
 894         CC = AArch64CC::EQ;
 895       }
 896       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 897           .addImm(CC)
 898           .addMBB(TBB);
 899
 900       // Obtain the branch weight and add the TrueBB to the successor list.
 901       uint32_t BranchWeight = 0;
 902       if (FuncInfo.BPI)
 903         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 904                                                   TBB->getBasicBlock());
 905       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
 906
 907       FastEmitBranch(FBB, DbgLoc);
 908       return true;
 909     }
 910   } else if (const ConstantInt *CI =
 911                  dyn_cast<ConstantInt>(BI->getCondition())) {
 912     uint64_t Imm = CI->getZExtValue();
 913     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
 914     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
 915         .addMBB(Target);
 916
 917     // Obtain the branch weight and add the target to the successor list.
 918     uint32_t BranchWeight = 0;
 919     if (FuncInfo.BPI)
 920       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 921                                                  Target->getBasicBlock());
 922     FuncInfo.MBB->addSuccessor(Target, BranchWeight);
 923     return true;
 924   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
 925     // Fake request the condition, otherwise the intrinsic might be completely
 926     // optimized away.
 927     unsigned CondReg = getRegForValue(BI->getCondition());
 928     if (!CondReg)
 929       return false;
 930
 931     // Emit the branch.
 932     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 933       .addImm(CC)
 934       .addMBB(TBB);
 935
 936     // Obtain the branch weight and add the TrueBB to the successor list.
 937     uint32_t BranchWeight = 0;
 938     if (FuncInfo.BPI)
 939       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 940                                                  TBB->getBasicBlock());
 941     FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
 942
 943     FastEmitBranch(FBB, DbgLoc);
 944     return true;
 945   }
 946
 947   unsigned CondReg = getRegForValue(BI->getCondition());
 948   if (CondReg == 0)
 949     return false;
 950
 951   // We've been divorced from our compare!  Our block was split, and
 952   // now our compare lives in a predecessor block.  We musn't
 953   // re-compare here, as the children of the compare aren't guaranteed
 954   // live across the block boundary (we *could* check for this).
 955   // Regardless, the compare has been done in the predecessor block,
 956   // and it left a value for us in a virtual register.  Ergo, we test
 957   // the one-bit value left in the virtual register.
 958   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri),
 959           AArch64::WZR)
 960       .addReg(CondReg)
 961       .addImm(0)
 962       .addImm(0);
 963
 964   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
 965     std::swap(TBB, FBB);
 966     CC = AArch64CC::EQ;
 967   }
 968
 969   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
 970       .addImm(CC)
 971       .addMBB(TBB);
 972
 973   // Obtain the branch weight and add the TrueBB to the successor list.
 974   uint32_t BranchWeight = 0;
 975   if (FuncInfo.BPI)
 976     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
 977                                                TBB->getBasicBlock());
 978   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
 979
 980   FastEmitBranch(FBB, DbgLoc);
 981   return true;
 982 }
 983
 984 bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
 985   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
 986   unsigned AddrReg = getRegForValue(BI->getOperand(0));
 987   if (AddrReg == 0)
 988     return false;
 989
 990   // Emit the indirect branch.
 991   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BR))
 992       .addReg(AddrReg);
 993
 994   // Make sure the CFG is up-to-date.
 995   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
 996     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
 997
 998   return true;
 999 }
1000
1001 bool AArch64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
1002   Type *Ty = Src1Value->getType();
1003   EVT SrcEVT = TLI.getValueType(Ty, true);
1004   if (!SrcEVT.isSimple())
1005     return false;
1006   MVT SrcVT = SrcEVT.getSimpleVT();
1007
1008   // Check to see if the 2nd operand is a constant that we can encode directly
1009   // in the compare.
1010   uint64_t Imm;
1011   bool UseImm = false;
1012   bool isNegativeImm = false;
1013   if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
1014     if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
1015         SrcVT == MVT::i8 || SrcVT == MVT::i1) {
1016       const APInt &CIVal = ConstInt->getValue();
1017
1018       Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue();
1019       if (CIVal.isNegative()) {
1020         isNegativeImm = true;
1021         Imm = -Imm;
1022       }
1023       // FIXME: We can handle more immediates using shifts.
1024       UseImm = ((Imm & 0xfff) == Imm);
1025     }
1026   } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
1027     if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
1028       if (ConstFP->isZero() && !ConstFP->isNegative())
1029         UseImm = true;
1030   }
1031
1032   unsigned ZReg;
1033   unsigned CmpOpc;
1034   bool isICmp = true;
1035   bool needsExt = false;
1036   switch (SrcVT.SimpleTy) {
1037   default:
1038     return false;
1039   case MVT::i1:
1040   case MVT::i8:
1041   case MVT::i16:
1042     needsExt = true;
1043   // Intentional fall-through.
1044   case MVT::i32:
1045     ZReg = AArch64::WZR;
1046     if (UseImm)
1047       CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri;
1048     else
1049       CmpOpc = AArch64::SUBSWrr;
1050     break;
1051   case MVT::i64:
1052     ZReg = AArch64::XZR;
1053     if (UseImm)
1054       CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri;
1055     else
1056       CmpOpc = AArch64::SUBSXrr;
1057     break;
1058   case MVT::f32:
1059     isICmp = false;
1060     CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr;
1061     break;
1062   case MVT::f64:
1063     isICmp = false;
1064     CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr;
1065     break;
1066   }
1067
1068   unsigned SrcReg1 = getRegForValue(Src1Value);
1069   if (SrcReg1 == 0)
1070     return false;
1071
1072   unsigned SrcReg2;
1073   if (!UseImm) {
1074     SrcReg2 = getRegForValue(Src2Value);
1075     if (SrcReg2 == 0)
1076       return false;
1077   }
1078
1079   // We have i1, i8, or i16, we need to either zero extend or sign extend.
1080   if (needsExt) {
1081     SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
1082     if (SrcReg1 == 0)
1083       return false;
1084     if (!UseImm) {
1085       SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
1086       if (SrcReg2 == 0)
1087         return false;
1088     }
1089   }
1090
1091   if (isICmp) {
1092     if (UseImm)
1093       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1094           .addReg(ZReg)
1095           .addReg(SrcReg1)
1096           .addImm(Imm)
1097           .addImm(0);
1098     else
1099       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1100           .addReg(ZReg)
1101           .addReg(SrcReg1)
1102           .addReg(SrcReg2);
1103   } else {
1104     if (UseImm)
1105       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1106           .addReg(SrcReg1);
1107     else
1108       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1109           .addReg(SrcReg1)
1110           .addReg(SrcReg2);
1111   }
1112   return true;
1113 }
1114
1115 bool AArch64FastISel::SelectCmp(const Instruction *I) {
1116   const CmpInst *CI = cast<CmpInst>(I);
1117
1118   // We may not handle every CC for now.
1119   AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
1120   if (CC == AArch64CC::AL)
1121     return false;
1122
1123   // Emit the cmp.
1124   if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1125     return false;
1126
1127   // Now set a register based on the comparison.
1128   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
1129   unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
1130   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1131           ResultReg)
1132       .addReg(AArch64::WZR)
1133       .addReg(AArch64::WZR)
1134       .addImm(invertedCC);
1135
1136   UpdateValueMap(I, ResultReg);
1137   return true;
1138 }
1139
1140 bool AArch64FastISel::SelectSelect(const Instruction *I) {
1141   const SelectInst *SI = cast<SelectInst>(I);
1142
1143   EVT DestEVT = TLI.getValueType(SI->getType(), true);
1144   if (!DestEVT.isSimple())
1145     return false;
1146
1147   MVT DestVT = DestEVT.getSimpleVT();
1148   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
1149       DestVT != MVT::f64)
1150     return false;
1151
1152   unsigned SelectOpc;
1153   switch (DestVT.SimpleTy) {
1154   default: return false;
1155   case MVT::i32: SelectOpc = AArch64::CSELWr;    break;
1156   case MVT::i64: SelectOpc = AArch64::CSELXr;    break;
1157   case MVT::f32: SelectOpc = AArch64::FCSELSrrr; break;
1158   case MVT::f64: SelectOpc = AArch64::FCSELDrrr; break;
1159   }
1160
1161   const Value *Cond = SI->getCondition();
1162   bool NeedTest = true;
1163   AArch64CC::CondCode CC = AArch64CC::NE;
1164   if (foldXALUIntrinsic(CC, I, Cond))
1165     NeedTest = false;
1166
1167   unsigned CondReg = getRegForValue(Cond);
1168   if (!CondReg)
1169     return false;
1170   bool CondIsKill = hasTrivialKill(Cond);
1171
1172   if (NeedTest) {
1173     MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
1174     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
1175     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
1176             ANDReg)
1177       .addReg(CondReg, getKillRegState(CondIsKill))
1178       .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1179
1180     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri))
1181       .addReg(ANDReg)
1182       .addReg(ANDReg)
1183       .addImm(0)
1184       .addImm(0);
1185   }
1186
1187   unsigned TrueReg = getRegForValue(SI->getTrueValue());
1188   bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
1189
1190   unsigned FalseReg = getRegForValue(SI->getFalseValue());
1191   bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
1192
1193   if (!TrueReg || !FalseReg)
1194     return false;
1195
1196   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
1197   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc),
1198           ResultReg)
1199     .addReg(TrueReg, getKillRegState(TrueIsKill))
1200     .addReg(FalseReg, getKillRegState(FalseIsKill))
1201     .addImm(CC);
1202
1203   UpdateValueMap(I, ResultReg);
1204   return true;
1205 }
1206
1207 bool AArch64FastISel::SelectFPExt(const Instruction *I) {
1208   Value *V = I->getOperand(0);
1209   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
1210     return false;
1211
1212   unsigned Op = getRegForValue(V);
1213   if (Op == 0)
1214     return false;
1215
1216   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
1217   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
1218           ResultReg).addReg(Op);
1219   UpdateValueMap(I, ResultReg);
1220   return true;
1221 }
1222
1223 bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
1224   Value *V = I->getOperand(0);
1225   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
1226     return false;
1227
1228   unsigned Op = getRegForValue(V);
1229   if (Op == 0)
1230     return false;
1231
1232   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
1233   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
1234           ResultReg).addReg(Op);
1235   UpdateValueMap(I, ResultReg);
1236   return true;
1237 }
1238
1239 // FPToUI and FPToSI
1240 bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
1241   MVT DestVT;
1242   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1243     return false;
1244
1245   unsigned SrcReg = getRegForValue(I->getOperand(0));
1246   if (SrcReg == 0)
1247     return false;
1248
1249   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1250   if (SrcVT == MVT::f128)
1251     return false;
1252
1253   unsigned Opc;
1254   if (SrcVT == MVT::f64) {
1255     if (Signed)
1256       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
1257     else
1258       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
1259   } else {
1260     if (Signed)
1261       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
1262     else
1263       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
1264   }
1265   unsigned ResultReg = createResultReg(
1266       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
1267   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1268       .addReg(SrcReg);
1269   UpdateValueMap(I, ResultReg);
1270   return true;
1271 }
1272
1273 bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
1274   MVT DestVT;
1275   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1276     return false;
1277   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
1278           "Unexpected value type.");
1279
1280   unsigned SrcReg = getRegForValue(I->getOperand(0));
1281   if (SrcReg == 0)
1282     return false;
1283
1284   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1285
1286   // Handle sign-extension.
1287   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
1288     SrcReg =
1289         EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
1290     if (SrcReg == 0)
1291       return false;
1292   }
1293
1294   MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &AArch64::GPR64RegClass
1295                                                   : &AArch64::GPR32RegClass);
1296
1297   unsigned Opc;
1298   if (SrcVT == MVT::i64) {
1299     if (Signed)
1300       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
1301     else
1302       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
1303   } else {
1304     if (Signed)
1305       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
1306     else
1307       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
1308   }
1309
1310   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
1311   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1312       .addReg(SrcReg);
1313   UpdateValueMap(I, ResultReg);
1314   return true;
1315 }
1316
1317 bool AArch64FastISel::FastLowerArguments() {
1318   if (!FuncInfo.CanLowerReturn)
1319     return false;
1320
1321   const Function *F = FuncInfo.Fn;
1322   if (F->isVarArg())
1323     return false;
1324
1325   CallingConv::ID CC = F->getCallingConv();
1326   if (CC != CallingConv::C)
1327     return false;
1328
1329   // Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and
1330   // FPR each.
1331   unsigned GPRCnt = 0;
1332   unsigned FPRCnt = 0;
1333   unsigned Idx = 0;
1334   for (auto const &Arg : F->args()) {
1335     // The first argument is at index 1.
1336     ++Idx;
1337     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
1338         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
1339         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
1340         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
1341       return false;
1342
1343     Type *ArgTy = Arg.getType();
1344     if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
1345       return false;
1346
1347     EVT ArgVT = TLI.getValueType(ArgTy);
1348     if (!ArgVT.isSimple()) return false;
1349     switch (ArgVT.getSimpleVT().SimpleTy) {
1350     default: return false;
1351     case MVT::i1:
1352     case MVT::i8:
1353     case MVT::i16:
1354     case MVT::i32:
1355     case MVT::i64:
1356       ++GPRCnt;
1357       break;
1358     case MVT::f16:
1359     case MVT::f32:
1360     case MVT::f64:
1361       ++FPRCnt;
1362       break;
1363     }
1364
1365     if (GPRCnt > 8 || FPRCnt > 8)
1366       return false;
1367   }
1368
1369   static const MCPhysReg Registers[5][8] = {
1370     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
1371       AArch64::W5, AArch64::W6, AArch64::W7 },
1372     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
1373       AArch64::X5, AArch64::X6, AArch64::X7 },
1374     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
1375       AArch64::H5, AArch64::H6, AArch64::H7 },
1376     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
1377       AArch64::S5, AArch64::S6, AArch64::S7 },
1378     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
1379       AArch64::D5, AArch64::D6, AArch64::D7 }
1380   };
1381
1382   unsigned GPRIdx = 0;
1383   unsigned FPRIdx = 0;
1384   for (auto const &Arg : F->args()) {
1385     MVT VT = TLI.getSimpleValueType(Arg.getType());
1386     unsigned SrcReg;
1387     switch (VT.SimpleTy) {
1388     default: llvm_unreachable("Unexpected value type.");
1389     case MVT::i1:
1390     case MVT::i8:
1391     case MVT::i16: VT = MVT::i32; // fall-through
1392     case MVT::i32: SrcReg = Registers[0][GPRIdx++]; break;
1393     case MVT::i64: SrcReg = Registers[1][GPRIdx++]; break;
1394     case MVT::f16: SrcReg = Registers[2][FPRIdx++]; break;
1395     case MVT::f32: SrcReg = Registers[3][FPRIdx++]; break;
1396     case MVT::f64: SrcReg = Registers[4][FPRIdx++]; break;
1397     }
1398
1399     // Skip unused arguments.
1400     if (Arg.use_empty()) {
1401       UpdateValueMap(&Arg, 0);
1402       continue;
1403     }
1404
1405     const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
1406     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
1407     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
1408     // Without this, EmitLiveInCopies may eliminate the livein if its only
1409     // use is a bitcast (which isn't turned into an instruction).
1410     unsigned ResultReg = createResultReg(RC);
1411     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1412             TII.get(TargetOpcode::COPY), ResultReg)
1413       .addReg(DstReg, getKillRegState(true));
1414     UpdateValueMap(&Arg, ResultReg);
1415   }
1416   return true;
1417 }
1418
1419 bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
1420                                       SmallVectorImpl<MVT> &OutVTs,
1421                                       unsigned &NumBytes) {
1422   CallingConv::ID CC = CLI.CallConv;
1423   SmallVector<CCValAssign, 16> ArgLocs;
1424   CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
1425   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
1426
1427   // Get a count of how many bytes are to be pushed on the stack.
1428   NumBytes = CCInfo.getNextStackOffset();
1429
1430   // Issue CALLSEQ_START
1431   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1432   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
1433     .addImm(NumBytes);
1434
1435   // Process the args.
1436   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1437     CCValAssign &VA = ArgLocs[i];
1438     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
1439     MVT ArgVT = OutVTs[VA.getValNo()];
1440
1441     unsigned ArgReg = getRegForValue(ArgVal);
1442     if (!ArgReg)
1443       return false;
1444
1445     // Handle arg promotion: SExt, ZExt, AExt.
1446     switch (VA.getLocInfo()) {
1447     case CCValAssign::Full:
1448       break;
1449     case CCValAssign::SExt: {
1450       MVT DestVT = VA.getLocVT();
1451       MVT SrcVT = ArgVT;
1452       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
1453       if (!ArgReg)
1454         return false;
1455       break;
1456     }
1457     case CCValAssign::AExt:
1458     // Intentional fall-through.
1459     case CCValAssign::ZExt: {
1460       MVT DestVT = VA.getLocVT();
1461       MVT SrcVT = ArgVT;
1462       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
1463       if (!ArgReg)
1464         return false;
1465       break;
1466     }
1467     default:
1468       llvm_unreachable("Unknown arg promotion!");
1469     }
1470
1471     // Now copy/store arg to correct locations.
1472     if (VA.isRegLoc() && !VA.needsCustom()) {
1473       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1474               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
1475       CLI.OutRegs.push_back(VA.getLocReg());
1476     } else if (VA.needsCustom()) {
1477       // FIXME: Handle custom args.
1478       return false;
1479     } else {
1480       assert(VA.isMemLoc() && "Assuming store on stack.");
1481
1482       // Don't emit stores for undef values.
1483       if (isa<UndefValue>(ArgVal))
1484         continue;
1485
1486       // Need to store on the stack.
1487       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
1488
1489       unsigned BEAlign = 0;
1490       if (ArgSize < 8 && !Subtarget->isLittleEndian())
1491         BEAlign = 8 - ArgSize;
1492
1493       Address Addr;
1494       Addr.setKind(Address::RegBase);
1495       Addr.setReg(AArch64::SP);
1496       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
1497
1498       if (!EmitStore(ArgVT, ArgReg, Addr))
1499         return false;
1500     }
1501   }
1502   return true;
1503 }
1504
1505 bool AArch64FastISel::FinishCall(CallLoweringInfo &CLI, MVT RetVT,
1506                                  unsigned NumBytes) {
1507   CallingConv::ID CC = CLI.CallConv;
1508
1509   // Issue CALLSEQ_END
1510   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
1511   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
1512     .addImm(NumBytes).addImm(0);
1513
1514   // Now the return value.
1515   if (RetVT != MVT::isVoid) {
1516     SmallVector<CCValAssign, 16> RVLocs;
1517     CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
1518     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
1519
1520     // Only handle a single return value.
1521     if (RVLocs.size() != 1)
1522       return false;
1523
1524     // Copy all of the result registers out of their specified physreg.
1525     MVT CopyVT = RVLocs[0].getValVT();
1526     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
1527     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1528             TII.get(TargetOpcode::COPY), ResultReg)
1529       .addReg(RVLocs[0].getLocReg());
1530     CLI.InRegs.push_back(RVLocs[0].getLocReg());
1531
1532     CLI.ResultReg = ResultReg;
1533     CLI.NumResultRegs = 1;
1534   }
1535
1536   return true;
1537 }
1538
1539 bool AArch64FastISel::FastLowerCall(CallLoweringInfo &CLI) {
1540   CallingConv::ID CC  = CLI.CallConv;
1541   bool IsVarArg       = CLI.IsVarArg;
1542   const Value *Callee = CLI.Callee;
1543   const char *SymName = CLI.SymName;
1544
1545   CodeModel::Model CM = TM.getCodeModel();
1546   // Only support the small and large code model.
1547   if (CM != CodeModel::Small && CM != CodeModel::Large)
1548     return false;
1549
1550   // FIXME: Add large code model support for ELF.
1551   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
1552     return false;
1553
1554   // Let SDISel handle vararg functions.
1555   if (IsVarArg)
1556     return false;
1557
1558   // FIXME: Only handle *simple* calls for now.
1559   MVT RetVT;
1560   if (CLI.RetTy->isVoidTy())
1561     RetVT = MVT::isVoid;
1562   else if (!isTypeLegal(CLI.RetTy, RetVT))
1563     return false;
1564
1565   for (auto Flag : CLI.OutFlags)
1566     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
1567       return false;
1568
1569   // Set up the argument vectors.
1570   SmallVector<MVT, 16> OutVTs;
1571   OutVTs.reserve(CLI.OutVals.size());
1572
1573   for (auto *Val : CLI.OutVals) {
1574     MVT VT;
1575     if (!isTypeLegal(Val->getType(), VT) &&
1576         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
1577       return false;
1578
1579     // We don't handle vector parameters yet.
1580     if (VT.isVector() || VT.getSizeInBits() > 64)
1581       return false;
1582
1583     OutVTs.push_back(VT);
1584   }
1585
1586   Address Addr;
1587   if (!ComputeCallAddress(Callee, Addr))
1588     return false;
1589
1590   // Handle the arguments now that we've gotten them.
1591   unsigned NumBytes;
1592   if (!ProcessCallArgs(CLI, OutVTs, NumBytes))
1593     return false;
1594
1595   // Issue the call.
1596   MachineInstrBuilder MIB;
1597   if (CM == CodeModel::Small) {
1598     unsigned CallOpc = Addr.getReg() ? AArch64::BLR : AArch64::BL;
1599     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
1600     if (SymName)
1601       MIB.addExternalSymbol(SymName, 0);
1602     else if (Addr.getGlobalValue())
1603       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
1604     else if (Addr.getReg())
1605       MIB.addReg(Addr.getReg());
1606     else
1607       return false;
1608   } else {
1609     unsigned CallReg = 0;
1610     if (SymName) {
1611       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
1612       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
1613               ADRPReg)
1614         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
1615
1616       CallReg = createResultReg(&AArch64::GPR64RegClass);
1617       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
1618               CallReg)
1619         .addReg(ADRPReg)
1620         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
1621                            AArch64II::MO_NC);
1622     } else if (Addr.getGlobalValue()) {
1623       CallReg = AArch64MaterializeGV(Addr.getGlobalValue());
1624     } else if (Addr.getReg())
1625       CallReg = Addr.getReg();
1626
1627     if (!CallReg)
1628       return false;
1629
1630     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1631                   TII.get(AArch64::BLR)).addReg(CallReg);
1632   }
1633
1634   // Add implicit physical register uses to the call.
1635   for (auto Reg : CLI.OutRegs)
1636     MIB.addReg(Reg, RegState::Implicit);
1637
1638   // Add a register mask with the call-preserved registers.
1639   // Proper defs for return values will be added by setPhysRegsDeadExcept().
1640   MIB.addRegMask(TRI.getCallPreservedMask(CC));
1641
1642   CLI.Call = MIB;
1643
1644   // Finish off the call including any return values.
1645   return FinishCall(CLI, RetVT, NumBytes);
1646 }
1647
1648 bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
1649   if (Alignment)
1650     return Len / Alignment <= 4;
1651   else
1652     return Len < 32;
1653 }
1654
1655 bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
1656                                          uint64_t Len, unsigned Alignment) {
1657   // Make sure we don't bloat code by inlining very large memcpy's.
1658   if (!IsMemCpySmall(Len, Alignment))
1659     return false;
1660
1661   int64_t UnscaledOffset = 0;
1662   Address OrigDest = Dest;
1663   Address OrigSrc = Src;
1664
1665   while (Len) {
1666     MVT VT;
1667     if (!Alignment || Alignment >= 8) {
1668       if (Len >= 8)
1669         VT = MVT::i64;
1670       else if (Len >= 4)
1671         VT = MVT::i32;
1672       else if (Len >= 2)
1673         VT = MVT::i16;
1674       else {
1675         VT = MVT::i8;
1676       }
1677     } else {
1678       // Bound based on alignment.
1679       if (Len >= 4 && Alignment == 4)
1680         VT = MVT::i32;
1681       else if (Len >= 2 && Alignment == 2)
1682         VT = MVT::i16;
1683       else {
1684         VT = MVT::i8;
1685       }
1686     }
1687
1688     bool RV;
1689     unsigned ResultReg;
1690     RV = EmitLoad(VT, ResultReg, Src);
1691     if (!RV)
1692       return false;
1693
1694     RV = EmitStore(VT, ResultReg, Dest);
1695     if (!RV)
1696       return false;
1697
1698     int64_t Size = VT.getSizeInBits() / 8;
1699     Len -= Size;
1700     UnscaledOffset += Size;
1701
1702     // We need to recompute the unscaled offset for each iteration.
1703     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
1704     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
1705   }
1706
1707   return true;
1708 }
1709
1710 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
1711 /// into the user. The condition code will only be updated on success.
1712 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
1713                                         const Instruction *I,
1714                                         const Value *Cond) {
1715   if (!isa<ExtractValueInst>(Cond))
1716     return false;
1717
1718   const auto *EV = cast<ExtractValueInst>(Cond);
1719   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
1720     return false;
1721
1722   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
1723   MVT RetVT;
1724   const Function *Callee = II->getCalledFunction();
1725   Type *RetTy =
1726   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
1727   if (!isTypeLegal(RetTy, RetVT))
1728     return false;
1729
1730   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1731     return false;
1732
1733   AArch64CC::CondCode TmpCC;
1734   switch (II->getIntrinsicID()) {
1735     default: return false;
1736     case Intrinsic::sadd_with_overflow:
1737     case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break;
1738     case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break;
1739     case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break;
1740     case Intrinsic::smul_with_overflow:
1741     case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break;
1742   }
1743
1744   // Check if both instructions are in the same basic block.
1745   if (II->getParent() != I->getParent())
1746     return false;
1747
1748   // Make sure nothing is in the way
1749   BasicBlock::const_iterator Start = I;
1750   BasicBlock::const_iterator End = II;
1751   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
1752     // We only expect extractvalue instructions between the intrinsic and the
1753     // instruction to be selected.
1754     if (!isa<ExtractValueInst>(Itr))
1755       return false;
1756
1757     // Check that the extractvalue operand comes from the intrinsic.
1758     const auto *EVI = cast<ExtractValueInst>(Itr);
1759     if (EVI->getAggregateOperand() != II)
1760       return false;
1761   }
1762
1763   CC = TmpCC;
1764   return true;
1765 }
1766
1767 bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
1768   // FIXME: Handle more intrinsics.
1769   switch (II->getIntrinsicID()) {
1770   default: return false;
1771   case Intrinsic::frameaddress: {
1772     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
1773     MFI->setFrameAddressIsTaken(true);
1774
1775     const AArch64RegisterInfo *RegInfo =
1776         static_cast<const AArch64RegisterInfo *>(
1777             TM.getSubtargetImpl()->getRegisterInfo());
1778     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
1779     unsigned SrcReg = FramePtr;
1780
1781     // Recursively load frame address
1782     // ldr x0, [fp]
1783     // ldr x0, [x0]
1784     // ldr x0, [x0]
1785     // ...
1786     unsigned DestReg;
1787     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
1788     while (Depth--) {
1789       DestReg = createResultReg(&AArch64::GPR64RegClass);
1790       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1791               TII.get(AArch64::LDRXui), DestReg)
1792         .addReg(SrcReg).addImm(0);
1793       SrcReg = DestReg;
1794     }
1795
1796     UpdateValueMap(II, SrcReg);
1797     return true;
1798   }
1799   case Intrinsic::memcpy:
1800   case Intrinsic::memmove: {
1801     const auto *MTI = cast<MemTransferInst>(II);
1802     // Don't handle volatile.
1803     if (MTI->isVolatile())
1804       return false;
1805
1806     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
1807     // we would emit dead code because we don't currently handle memmoves.
1808     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
1809     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
1810       // Small memcpy's are common enough that we want to do them without a call
1811       // if possible.
1812       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
1813       unsigned Alignment = MTI->getAlignment();
1814       if (IsMemCpySmall(Len, Alignment)) {
1815         Address Dest, Src;
1816         if (!ComputeAddress(MTI->getRawDest(), Dest) ||
1817             !ComputeAddress(MTI->getRawSource(), Src))
1818           return false;
1819         if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
1820           return true;
1821       }
1822     }
1823
1824     if (!MTI->getLength()->getType()->isIntegerTy(64))
1825       return false;
1826
1827     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
1828       // Fast instruction selection doesn't support the special
1829       // address spaces.
1830       return false;
1831
1832     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
1833     return LowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
1834   }
1835   case Intrinsic::memset: {
1836     const MemSetInst *MSI = cast<MemSetInst>(II);
1837     // Don't handle volatile.
1838     if (MSI->isVolatile())
1839       return false;
1840
1841     if (!MSI->getLength()->getType()->isIntegerTy(64))
1842       return false;
1843
1844     if (MSI->getDestAddressSpace() > 255)
1845       // Fast instruction selection doesn't support the special
1846       // address spaces.
1847       return false;
1848
1849     return LowerCallTo(II, "memset", II->getNumArgOperands() - 2);
1850   }
1851   case Intrinsic::trap: {
1852     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
1853         .addImm(1);
1854     return true;
1855   }
1856   case Intrinsic::sqrt: {
1857     Type *RetTy = II->getCalledFunction()->getReturnType();
1858
1859     MVT VT;
1860     if (!isTypeLegal(RetTy, VT))
1861       return false;
1862
1863     unsigned Op0Reg = getRegForValue(II->getOperand(0));
1864     if (!Op0Reg)
1865       return false;
1866     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
1867
1868     unsigned ResultReg = FastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
1869     if (!ResultReg)
1870       return false;
1871
1872     UpdateValueMap(II, ResultReg);
1873     return true;
1874   }
1875   case Intrinsic::sadd_with_overflow:
1876   case Intrinsic::uadd_with_overflow:
1877   case Intrinsic::ssub_with_overflow:
1878   case Intrinsic::usub_with_overflow:
1879   case Intrinsic::smul_with_overflow:
1880   case Intrinsic::umul_with_overflow: {
1881     // This implements the basic lowering of the xalu with overflow intrinsics.
1882     const Function *Callee = II->getCalledFunction();
1883     auto *Ty = cast<StructType>(Callee->getReturnType());
1884     Type *RetTy = Ty->getTypeAtIndex(0U);
1885     Type *CondTy = Ty->getTypeAtIndex(1);
1886
1887     MVT VT;
1888     if (!isTypeLegal(RetTy, VT))
1889       return false;
1890
1891     if (VT != MVT::i32 && VT != MVT::i64)
1892       return false;
1893
1894     const Value *LHS = II->getArgOperand(0);
1895     const Value *RHS = II->getArgOperand(1);
1896     // Canonicalize immediate to the RHS.
1897     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
1898         isCommutativeIntrinsic(II))
1899       std::swap(LHS, RHS);
1900
1901     unsigned LHSReg = getRegForValue(LHS);
1902     if (!LHSReg)
1903       return false;
1904     bool LHSIsKill = hasTrivialKill(LHS);
1905
1906     // Check if the immediate can be encoded in the instruction and if we should
1907     // invert the instruction (adds -> subs) to handle negative immediates.
1908     bool UseImm = false;
1909     bool UseInverse = false;
1910     uint64_t Imm = 0;
1911     if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1912       if (C->isNegative()) {
1913         UseInverse = true;
1914         Imm = -(C->getSExtValue());
1915       } else
1916         Imm = C->getZExtValue();
1917
1918       if (isUInt<12>(Imm))
1919         UseImm = true;
1920
1921       UseInverse = UseImm && UseInverse;
1922     }
1923
1924     static const unsigned OpcTable[2][2][2] = {
1925       { {AArch64::ADDSWrr, AArch64::ADDSXrr},
1926         {AArch64::ADDSWri, AArch64::ADDSXri} },
1927       { {AArch64::SUBSWrr, AArch64::SUBSXrr},
1928         {AArch64::SUBSWri, AArch64::SUBSXri} }
1929     };
1930     unsigned Opc = 0;
1931     unsigned MulReg = 0;
1932     unsigned RHSReg = 0;
1933     bool RHSIsKill = false;
1934     AArch64CC::CondCode CC = AArch64CC::Invalid;
1935     bool Is64Bit = VT == MVT::i64;
1936     switch (II->getIntrinsicID()) {
1937     default: llvm_unreachable("Unexpected intrinsic!");
1938     case Intrinsic::sadd_with_overflow:
1939       Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
1940     case Intrinsic::uadd_with_overflow:
1941       Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::HS; break;
1942     case Intrinsic::ssub_with_overflow:
1943       Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
1944     case Intrinsic::usub_with_overflow:
1945       Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::LO; break;
1946     case Intrinsic::smul_with_overflow: {
1947       CC = AArch64CC::NE;
1948       RHSReg = getRegForValue(RHS);
1949       if (!RHSReg)
1950         return false;
1951       RHSIsKill = hasTrivialKill(RHS);
1952
1953       if (VT == MVT::i32) {
1954         MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1955         unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32);
1956         MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
1957                                             AArch64::sub_32);
1958         ShiftReg = FastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
1959                                               AArch64::sub_32);
1960         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
1961         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1962                 TII.get(AArch64::SUBSWrs), CmpReg)
1963           .addReg(ShiftReg, getKillRegState(true))
1964           .addReg(MulReg, getKillRegState(false))
1965           .addImm(159); // 159 <-> asr #31
1966       } else {
1967         assert(VT == MVT::i64 && "Unexpected value type.");
1968         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1969         unsigned SMULHReg = FastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
1970                                         RHSReg, RHSIsKill);
1971         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
1972         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1973                 TII.get(AArch64::SUBSXrs), CmpReg)
1974           .addReg(SMULHReg, getKillRegState(true))
1975           .addReg(MulReg, getKillRegState(false))
1976           .addImm(191); // 191 <-> asr #63
1977       }
1978       break;
1979     }
1980     case Intrinsic::umul_with_overflow: {
1981       CC = AArch64CC::NE;
1982       RHSReg = getRegForValue(RHS);
1983       if (!RHSReg)
1984         return false;
1985       RHSIsKill = hasTrivialKill(RHS);
1986
1987       if (VT == MVT::i32) {
1988         MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1989         unsigned CmpReg = createResultReg(TLI.getRegClassFor(MVT::i64));
1990         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1991                 TII.get(AArch64::SUBSXrs), CmpReg)
1992           .addReg(AArch64::XZR, getKillRegState(true))
1993           .addReg(MulReg, getKillRegState(false))
1994           .addImm(96); // 96 <-> lsr #32
1995         MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
1996                                             AArch64::sub_32);
1997       } else {
1998         assert(VT == MVT::i64 && "Unexpected value type.");
1999         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2000         unsigned UMULHReg = FastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
2001                                         RHSReg, RHSIsKill);
2002         unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
2003         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2004                 TII.get(AArch64::SUBSXrr), CmpReg)
2005         .addReg(AArch64::XZR, getKillRegState(true))
2006         .addReg(UMULHReg, getKillRegState(false));
2007       }
2008       break;
2009     }
2010     }
2011
2012     if (!UseImm) {
2013       RHSReg = getRegForValue(RHS);
2014       if (!RHSReg)
2015         return false;
2016       RHSIsKill = hasTrivialKill(RHS);
2017     }
2018
2019     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
2020     if (Opc) {
2021       MachineInstrBuilder MIB;
2022       MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2023                     ResultReg)
2024               .addReg(LHSReg, getKillRegState(LHSIsKill));
2025       if (UseImm) {
2026         MIB.addImm(Imm);
2027         MIB.addImm(0);
2028       } else
2029         MIB.addReg(RHSReg, getKillRegState(RHSIsKill));
2030     }
2031     else
2032       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2033               TII.get(TargetOpcode::COPY), ResultReg)
2034         .addReg(MulReg);
2035
2036     unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
2037     assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2038     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2039             ResultReg2)
2040       .addReg(AArch64::WZR, getKillRegState(true))
2041       .addReg(AArch64::WZR, getKillRegState(true))
2042       .addImm(getInvertedCondCode(CC));
2043
2044     UpdateValueMap(II, ResultReg, 2);
2045     return true;
2046   }
2047   }
2048   return false;
2049 }
2050
2051 bool AArch64FastISel::SelectRet(const Instruction *I) {
2052   const ReturnInst *Ret = cast<ReturnInst>(I);
2053   const Function &F = *I->getParent()->getParent();
2054
2055   if (!FuncInfo.CanLowerReturn)
2056     return false;
2057
2058   if (F.isVarArg())
2059     return false;
2060
2061   // Build a list of return value registers.
2062   SmallVector<unsigned, 4> RetRegs;
2063
2064   if (Ret->getNumOperands() > 0) {
2065     CallingConv::ID CC = F.getCallingConv();
2066     SmallVector<ISD::OutputArg, 4> Outs;
2067     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
2068
2069     // Analyze operands of the call, assigning locations to each operand.
2070     SmallVector<CCValAssign, 16> ValLocs;
2071     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
2072                    I->getContext());
2073     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
2074                                                      : RetCC_AArch64_AAPCS;
2075     CCInfo.AnalyzeReturn(Outs, RetCC);
2076
2077     // Only handle a single return value for now.
2078     if (ValLocs.size() != 1)
2079       return false;
2080
2081     CCValAssign &VA = ValLocs[0];
2082     const Value *RV = Ret->getOperand(0);
2083
2084     // Don't bother handling odd stuff for now.
2085     if (VA.getLocInfo() != CCValAssign::Full)
2086       return false;
2087     // Only handle register returns for now.
2088     if (!VA.isRegLoc())
2089       return false;
2090     unsigned Reg = getRegForValue(RV);
2091     if (Reg == 0)
2092       return false;
2093
2094     unsigned SrcReg = Reg + VA.getValNo();
2095     unsigned DestReg = VA.getLocReg();
2096     // Avoid a cross-class copy. This is very unlikely.
2097     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
2098       return false;
2099
2100     EVT RVEVT = TLI.getValueType(RV->getType());
2101     if (!RVEVT.isSimple())
2102       return false;
2103
2104     // Vectors (of > 1 lane) in big endian need tricky handling.
2105     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
2106       return false;
2107
2108     MVT RVVT = RVEVT.getSimpleVT();
2109     if (RVVT == MVT::f128)
2110       return false;
2111     MVT DestVT = VA.getValVT();
2112     // Special handling for extended integers.
2113     if (RVVT != DestVT) {
2114       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2115         return false;
2116
2117       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
2118         return false;
2119
2120       bool isZExt = Outs[0].Flags.isZExt();
2121       SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
2122       if (SrcReg == 0)
2123         return false;
2124     }
2125
2126     // Make the copy.
2127     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2128             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
2129
2130     // Add register to return instruction.
2131     RetRegs.push_back(VA.getLocReg());
2132   }
2133
2134   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2135                                     TII.get(AArch64::RET_ReallyLR));
2136   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
2137     MIB.addReg(RetRegs[i], RegState::Implicit);
2138   return true;
2139 }
2140
2141 bool AArch64FastISel::SelectTrunc(const Instruction *I) {
2142   Type *DestTy = I->getType();
2143   Value *Op = I->getOperand(0);
2144   Type *SrcTy = Op->getType();
2145
2146   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2147   EVT DestEVT = TLI.getValueType(DestTy, true);
2148   if (!SrcEVT.isSimple())
2149     return false;
2150   if (!DestEVT.isSimple())
2151     return false;
2152
2153   MVT SrcVT = SrcEVT.getSimpleVT();
2154   MVT DestVT = DestEVT.getSimpleVT();
2155
2156   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2157       SrcVT != MVT::i8)
2158     return false;
2159   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
2160       DestVT != MVT::i1)
2161     return false;
2162
2163   unsigned SrcReg = getRegForValue(Op);
2164   if (!SrcReg)
2165     return false;
2166
2167   // If we're truncating from i64 to a smaller non-legal type then generate an
2168   // AND.  Otherwise, we know the high bits are undefined and a truncate doesn't
2169   // generate any code.
2170   if (SrcVT == MVT::i64) {
2171     uint64_t Mask = 0;
2172     switch (DestVT.SimpleTy) {
2173     default:
2174       // Trunc i64 to i32 is handled by the target-independent fast-isel.
2175       return false;
2176     case MVT::i1:
2177       Mask = 0x1;
2178       break;
2179     case MVT::i8:
2180       Mask = 0xff;
2181       break;
2182     case MVT::i16:
2183       Mask = 0xffff;
2184       break;
2185     }
2186     // Issue an extract_subreg to get the lower 32-bits.
2187     unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true,
2188                                                 AArch64::sub_32);
2189     MRI.constrainRegClass(Reg32, &AArch64::GPR32RegClass);
2190     // Create the AND instruction which performs the actual truncation.
2191     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
2192     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
2193             ANDReg)
2194         .addReg(Reg32)
2195         .addImm(AArch64_AM::encodeLogicalImmediate(Mask, 32));
2196     SrcReg = ANDReg;
2197   }
2198
2199   UpdateValueMap(I, SrcReg);
2200   return true;
2201 }
2202
2203 unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
2204   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
2205           DestVT == MVT::i64) &&
2206          "Unexpected value type.");
2207   // Handle i8 and i16 as i32.
2208   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2209     DestVT = MVT::i32;
2210
2211   if (isZExt) {
2212     MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2213     unsigned ResultReg = createResultReg(&AArch64::GPR32spRegClass);
2214     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
2215             ResultReg)
2216         .addReg(SrcReg)
2217         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2218
2219     if (DestVT == MVT::i64) {
2220       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
2221       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
2222       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2223       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2224               TII.get(AArch64::SUBREG_TO_REG), Reg64)
2225           .addImm(0)
2226           .addReg(ResultReg)
2227           .addImm(AArch64::sub_32);
2228       ResultReg = Reg64;
2229     }
2230     return ResultReg;
2231   } else {
2232     if (DestVT == MVT::i64) {
2233       // FIXME: We're SExt i1 to i64.
2234       return 0;
2235     }
2236     unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
2237     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SBFMWri),
2238             ResultReg)
2239         .addReg(SrcReg)
2240         .addImm(0)
2241         .addImm(0);
2242     return ResultReg;
2243   }
2244 }
2245
2246 unsigned AArch64FastISel::Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2247                                       unsigned Op1, bool Op1IsKill) {
2248   unsigned Opc, ZReg;
2249   switch (RetVT.SimpleTy) {
2250   default: return 0;
2251   case MVT::i8:
2252   case MVT::i16:
2253   case MVT::i32:
2254     RetVT = MVT::i32;
2255     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
2256   case MVT::i64:
2257     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
2258   }
2259
2260   // Create the base instruction, then add the operands.
2261   unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
2262   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2263     .addReg(Op0, getKillRegState(Op0IsKill))
2264     .addReg(Op1, getKillRegState(Op1IsKill))
2265     .addReg(ZReg, getKillRegState(true));
2266
2267   return ResultReg;
2268 }
2269
2270 unsigned AArch64FastISel::Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2271                                         unsigned Op1, bool Op1IsKill) {
2272   if (RetVT != MVT::i64)
2273     return 0;
2274
2275   // Create the base instruction, then add the operands.
2276   unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
2277   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SMADDLrrr),
2278           ResultReg)
2279     .addReg(Op0, getKillRegState(Op0IsKill))
2280     .addReg(Op1, getKillRegState(Op1IsKill))
2281     .addReg(AArch64::XZR, getKillRegState(true));
2282
2283   return ResultReg;
2284 }
2285
2286 unsigned AArch64FastISel::Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2287                                         unsigned Op1, bool Op1IsKill) {
2288   if (RetVT != MVT::i64)
2289     return 0;
2290
2291   // Create the base instruction, then add the operands.
2292   unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
2293   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::UMADDLrrr),
2294           ResultReg)
2295     .addReg(Op0, getKillRegState(Op0IsKill))
2296     .addReg(Op1, getKillRegState(Op1IsKill))
2297     .addReg(AArch64::XZR, getKillRegState(true));
2298
2299   return ResultReg;
2300 }
2301
2302 unsigned AArch64FastISel::Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2303                                       uint64_t Shift) {
2304   unsigned Opc, ImmR, ImmS;
2305   switch (RetVT.SimpleTy) {
2306   default: return 0;
2307   case MVT::i8:
2308     Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS =  7 - Shift; break;
2309   case MVT::i16:
2310     Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 15 - Shift; break;
2311   case MVT::i32:
2312     Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 31 - Shift; break;
2313   case MVT::i64:
2314     Opc = AArch64::UBFMXri; ImmR = -Shift % 64; ImmS = 63 - Shift; break;
2315   }
2316
2317   RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
2318   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, ImmR,
2319                           ImmS);
2320 }
2321
2322 unsigned AArch64FastISel::Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2323                                       uint64_t Shift) {
2324   unsigned Opc, ImmS;
2325   switch (RetVT.SimpleTy) {
2326   default: return 0;
2327   case MVT::i8:  Opc = AArch64::UBFMWri; ImmS =  7; break;
2328   case MVT::i16: Opc = AArch64::UBFMWri; ImmS = 15; break;
2329   case MVT::i32: Opc = AArch64::UBFMWri; ImmS = 31; break;
2330   case MVT::i64: Opc = AArch64::UBFMXri; ImmS = 63; break;
2331   }
2332
2333   RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
2334   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
2335                           ImmS);
2336 }
2337
2338 unsigned AArch64FastISel::Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2339                                       uint64_t Shift) {
2340   unsigned Opc, ImmS;
2341   switch (RetVT.SimpleTy) {
2342   default: return 0;
2343   case MVT::i8:  Opc = AArch64::SBFMWri; ImmS =  7; break;
2344   case MVT::i16: Opc = AArch64::SBFMWri; ImmS = 15; break;
2345   case MVT::i32: Opc = AArch64::SBFMWri; ImmS = 31; break;
2346   case MVT::i64: Opc = AArch64::SBFMXri; ImmS = 63; break;
2347   }
2348
2349   RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
2350   return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
2351                           ImmS);
2352 }
2353
2354 unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
2355                                      bool isZExt) {
2356   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
2357
2358   // FastISel does not have plumbing to deal with extensions where the SrcVT or
2359   // DestVT are odd things, so test to make sure that they are both types we can
2360   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
2361   // bail out to SelectionDAG.
2362   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
2363        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
2364       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
2365        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
2366     return 0;
2367
2368   unsigned Opc;
2369   unsigned Imm = 0;
2370
2371   switch (SrcVT.SimpleTy) {
2372   default:
2373     return 0;
2374   case MVT::i1:
2375     return Emiti1Ext(SrcReg, DestVT, isZExt);
2376   case MVT::i8:
2377     if (DestVT == MVT::i64)
2378       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2379     else
2380       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
2381     Imm = 7;
2382     break;
2383   case MVT::i16:
2384     if (DestVT == MVT::i64)
2385       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2386     else
2387       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
2388     Imm = 15;
2389     break;
2390   case MVT::i32:
2391     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
2392     Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2393     Imm = 31;
2394     break;
2395   }
2396
2397   // Handle i8 and i16 as i32.
2398   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2399     DestVT = MVT::i32;
2400   else if (DestVT == MVT::i64) {
2401     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2402     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2403             TII.get(AArch64::SUBREG_TO_REG), Src64)
2404         .addImm(0)
2405         .addReg(SrcReg)
2406         .addImm(AArch64::sub_32);
2407     SrcReg = Src64;
2408   }
2409
2410   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2411   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2412       .addReg(SrcReg)
2413       .addImm(0)
2414       .addImm(Imm);
2415
2416   return ResultReg;
2417 }
2418
2419 bool AArch64FastISel::SelectIntExt(const Instruction *I) {
2420   // On ARM, in general, integer casts don't involve legal types; this code
2421   // handles promotable integers.  The high bits for a type smaller than
2422   // the register size are assumed to be undefined.
2423   Type *DestTy = I->getType();
2424   Value *Src = I->getOperand(0);
2425   Type *SrcTy = Src->getType();
2426
2427   bool isZExt = isa<ZExtInst>(I);
2428   unsigned SrcReg = getRegForValue(Src);
2429   if (!SrcReg)
2430     return false;
2431
2432   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2433   EVT DestEVT = TLI.getValueType(DestTy, true);
2434   if (!SrcEVT.isSimple())
2435     return false;
2436   if (!DestEVT.isSimple())
2437     return false;
2438
2439   MVT SrcVT = SrcEVT.getSimpleVT();
2440   MVT DestVT = DestEVT.getSimpleVT();
2441   unsigned ResultReg = 0;
2442
2443   // Check if it is an argument and if it is already zero/sign-extended.
2444   if (const auto *Arg = dyn_cast<Argument>(Src)) {
2445     if ((isZExt && Arg->hasZExtAttr()) || (!isZExt && Arg->hasSExtAttr())) {
2446       if (DestVT == MVT::i64) {
2447         ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2448         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2449                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
2450           .addImm(0)
2451           .addReg(SrcReg)
2452           .addImm(AArch64::sub_32);
2453       } else
2454         ResultReg = SrcReg;
2455     }
2456   }
2457
2458   if (!ResultReg)
2459     ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2460
2461   if (!ResultReg)
2462     return false;
2463
2464   UpdateValueMap(I, ResultReg);
2465   return true;
2466 }
2467
2468 bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
2469   EVT DestEVT = TLI.getValueType(I->getType(), true);
2470   if (!DestEVT.isSimple())
2471     return false;
2472
2473   MVT DestVT = DestEVT.getSimpleVT();
2474   if (DestVT != MVT::i64 && DestVT != MVT::i32)
2475     return false;
2476
2477   unsigned DivOpc;
2478   bool is64bit = (DestVT == MVT::i64);
2479   switch (ISDOpcode) {
2480   default:
2481     return false;
2482   case ISD::SREM:
2483     DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
2484     break;
2485   case ISD::UREM:
2486     DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
2487     break;
2488   }
2489   unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
2490   unsigned Src0Reg = getRegForValue(I->getOperand(0));
2491   if (!Src0Reg)
2492     return false;
2493
2494   unsigned Src1Reg = getRegForValue(I->getOperand(1));
2495   if (!Src1Reg)
2496     return false;
2497
2498   unsigned QuotReg = createResultReg(TLI.getRegClassFor(DestVT));
2499   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), QuotReg)
2500       .addReg(Src0Reg)
2501       .addReg(Src1Reg);
2502   // The remainder is computed as numerator - (quotient * denominator) using the
2503   // MSUB instruction.
2504   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2505   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg)
2506       .addReg(QuotReg)
2507       .addReg(Src1Reg)
2508       .addReg(Src0Reg);
2509   UpdateValueMap(I, ResultReg);
2510   return true;
2511 }
2512
2513 bool AArch64FastISel::SelectMul(const Instruction *I) {
2514   EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2515   if (!SrcEVT.isSimple())
2516     return false;
2517   MVT SrcVT = SrcEVT.getSimpleVT();
2518
2519   // Must be simple value type.  Don't handle vectors.
2520   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2521       SrcVT != MVT::i8)
2522     return false;
2523
2524   unsigned Src0Reg = getRegForValue(I->getOperand(0));
2525   if (!Src0Reg)
2526     return false;
2527   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
2528
2529   unsigned Src1Reg = getRegForValue(I->getOperand(1));
2530   if (!Src1Reg)
2531     return false;
2532   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
2533
2534   unsigned ResultReg =
2535     Emit_MUL_rr(SrcVT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
2536
2537   if (!ResultReg)
2538     return false;
2539
2540   UpdateValueMap(I, ResultReg);
2541   return true;
2542 }
2543
2544 bool AArch64FastISel::SelectShift(const Instruction *I, bool IsLeftShift,
2545                                   bool IsArithmetic) {
2546   EVT RetEVT = TLI.getValueType(I->getType(), true);
2547   if (!RetEVT.isSimple())
2548     return false;
2549   MVT RetVT = RetEVT.getSimpleVT();
2550
2551   if (!isa<ConstantInt>(I->getOperand(1)))
2552     return false;
2553
2554   unsigned Op0Reg = getRegForValue(I->getOperand(0));
2555   if (!Op0Reg)
2556     return false;
2557   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
2558
2559   uint64_t ShiftVal = cast<ConstantInt>(I->getOperand(1))->getZExtValue();
2560
2561   unsigned ResultReg;
2562   if (IsLeftShift)
2563     ResultReg = Emit_LSL_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2564   else {
2565     if (IsArithmetic)
2566       ResultReg = Emit_ASR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2567     else
2568       ResultReg = Emit_LSR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2569   }
2570
2571   if (!ResultReg)
2572     return false;
2573
2574   UpdateValueMap(I, ResultReg);
2575   return true;
2576 }
2577
2578 bool AArch64FastISel::SelectBitCast(const Instruction *I) {
2579   MVT RetVT, SrcVT;
2580
2581   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
2582     return false;
2583   if (!isTypeLegal(I->getType(), RetVT))
2584     return false;
2585
2586   unsigned Opc;
2587   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
2588     Opc = AArch64::FMOVWSr;
2589   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
2590     Opc = AArch64::FMOVXDr;
2591   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
2592     Opc = AArch64::FMOVSWr;
2593   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
2594     Opc = AArch64::FMOVDXr;
2595   else
2596     return false;
2597
2598   unsigned Op0Reg = getRegForValue(I->getOperand(0));
2599   if (!Op0Reg)
2600     return false;
2601   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
2602   unsigned ResultReg = FastEmitInst_r(Opc, TLI.getRegClassFor(RetVT),
2603                                       Op0Reg, Op0IsKill);
2604
2605   if (!ResultReg)
2606     return false;
2607
2608   UpdateValueMap(I, ResultReg);
2609   return true;
2610 }
2611
2612 bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
2613   switch (I->getOpcode()) {
2614   default:
2615     break;
2616   case Instruction::Load:
2617     return SelectLoad(I);
2618   case Instruction::Store:
2619     return SelectStore(I);
2620   case Instruction::Br:
2621     return SelectBranch(I);
2622   case Instruction::IndirectBr:
2623     return SelectIndirectBr(I);
2624   case Instruction::FCmp:
2625   case Instruction::ICmp:
2626     return SelectCmp(I);
2627   case Instruction::Select:
2628     return SelectSelect(I);
2629   case Instruction::FPExt:
2630     return SelectFPExt(I);
2631   case Instruction::FPTrunc:
2632     return SelectFPTrunc(I);
2633   case Instruction::FPToSI:
2634     return SelectFPToInt(I, /*Signed=*/true);
2635   case Instruction::FPToUI:
2636     return SelectFPToInt(I, /*Signed=*/false);
2637   case Instruction::SIToFP:
2638     return SelectIntToFP(I, /*Signed=*/true);
2639   case Instruction::UIToFP:
2640     return SelectIntToFP(I, /*Signed=*/false);
2641   case Instruction::SRem:
2642     return SelectRem(I, ISD::SREM);
2643   case Instruction::URem:
2644     return SelectRem(I, ISD::UREM);
2645   case Instruction::Ret:
2646     return SelectRet(I);
2647   case Instruction::Trunc:
2648     return SelectTrunc(I);
2649   case Instruction::ZExt:
2650   case Instruction::SExt:
2651     return SelectIntExt(I);
2652
2653   // FIXME: All of these should really be handled by the target-independent
2654   // selector -> improve FastISel tblgen.
2655   case Instruction::Mul:
2656     return SelectMul(I);
2657   case Instruction::Shl:
2658       return SelectShift(I, /*IsLeftShift=*/true, /*IsArithmetic=*/false);
2659   case Instruction::LShr:
2660     return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/false);
2661   case Instruction::AShr:
2662     return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/true);
2663   case Instruction::BitCast:
2664     return SelectBitCast(I);
2665   }
2666   return false;
2667   // Silence warnings.
2668   (void)&CC_AArch64_DarwinPCS_VarArg;
2669 }
2670
2671 namespace llvm {
2672 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
2673                                         const TargetLibraryInfo *libInfo) {
2674   return new AArch64FastISel(funcInfo, libInfo);
2675 }
2676 }