lib/Target/ARM/ARMISelLowering.cpp

   1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the interfaces that ARM uses to lower LLVM code into a
  11 // selection DAG.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "ARM.h"
  16 #include "ARMAddressingModes.h"
  17 #include "ARMConstantPoolValue.h"
  18 #include "ARMISelLowering.h"
  19 #include "ARMMachineFunctionInfo.h"
  20 #include "ARMPerfectShuffle.h"
  21 #include "ARMRegisterInfo.h"
  22 #include "ARMSubtarget.h"
  23 #include "ARMTargetMachine.h"
  24 #include "ARMTargetObjectFile.h"
  25 #include "llvm/CallingConv.h"
  26 #include "llvm/Constants.h"
  27 #include "llvm/Function.h"
  28 #include "llvm/GlobalValue.h"
  29 #include "llvm/Instruction.h"
  30 #include "llvm/Intrinsics.h"
  31 #include "llvm/Type.h"
  32 #include "llvm/CodeGen/CallingConvLower.h"
  33 #include "llvm/CodeGen/MachineBasicBlock.h"
  34 #include "llvm/CodeGen/MachineFrameInfo.h"
  35 #include "llvm/CodeGen/MachineFunction.h"
  36 #include "llvm/CodeGen/MachineInstrBuilder.h"
  37 #include "llvm/CodeGen/MachineRegisterInfo.h"
  38 #include "llvm/CodeGen/PseudoSourceValue.h"
  39 #include "llvm/CodeGen/SelectionDAG.h"
  40 #include "llvm/MC/MCSectionMachO.h"
  41 #include "llvm/Target/TargetOptions.h"
  42 #include "llvm/ADT/VectorExtras.h"
  43 #include "llvm/Support/CommandLine.h"
  44 #include "llvm/Support/ErrorHandling.h"
  45 #include "llvm/Support/MathExtras.h"
  46 #include "llvm/Support/raw_ostream.h"
  47 #include <sstream>
  48 using namespace llvm;
  49
  50 static cl::opt<bool>
  51 EnableARMLongCalls("arm-long-calls", cl::Hidden,
  52   cl::desc("Generate calls via indirect call instructions."),
  53   cl::init(false));
  54
  55 static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
  56                                    CCValAssign::LocInfo &LocInfo,
  57                                    ISD::ArgFlagsTy &ArgFlags,
  58                                    CCState &State);
  59 static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
  60                                     CCValAssign::LocInfo &LocInfo,
  61                                     ISD::ArgFlagsTy &ArgFlags,
  62                                     CCState &State);
  63 static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
  64                                       CCValAssign::LocInfo &LocInfo,
  65                                       ISD::ArgFlagsTy &ArgFlags,
  66                                       CCState &State);
  67 static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
  68                                        CCValAssign::LocInfo &LocInfo,
  69                                        ISD::ArgFlagsTy &ArgFlags,
  70                                        CCState &State);
  71
  72 void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
  73                                        EVT PromotedBitwiseVT) {
  74   if (VT != PromotedLdStVT) {
  75     setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
  76     AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
  77                        PromotedLdStVT.getSimpleVT());
  78
  79     setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
  80     AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
  81                        PromotedLdStVT.getSimpleVT());
  82   }
  83
  84   EVT ElemTy = VT.getVectorElementType();
  85   if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
  86     setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
  87   if (ElemTy == MVT::i8 || ElemTy == MVT::i16)
  88     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
  89   if (ElemTy != MVT::i32) {
  90     setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
  91     setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
  92     setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
  93     setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
  94   }
  95   setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
  96   setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
  97   if (llvm::ModelWithRegSequence())
  98     setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
  99   else
 100     setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom);
 101   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
 102   setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
 103   setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
 104   if (VT.isInteger()) {
 105     setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
 106     setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
 107     setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
 108   }
 109
 110   // Promote all bit-wise operations.
 111   if (VT.isInteger() && VT != PromotedBitwiseVT) {
 112     setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
 113     AddPromotedToType (ISD::AND, VT.getSimpleVT(),
 114                        PromotedBitwiseVT.getSimpleVT());
 115     setOperationAction(ISD::OR,  VT.getSimpleVT(), Promote);
 116     AddPromotedToType (ISD::OR,  VT.getSimpleVT(),
 117                        PromotedBitwiseVT.getSimpleVT());
 118     setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
 119     AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
 120                        PromotedBitwiseVT.getSimpleVT());
 121   }
 122
 123   // Neon does not support vector divide/remainder operations.
 124   setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
 125   setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
 126   setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
 127   setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
 128   setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
 129   setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
 130 }
 131
 132 void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
 133   addRegisterClass(VT, ARM::DPRRegisterClass);
 134   addTypeForNEON(VT, MVT::f64, MVT::v2i32);
 135 }
 136
 137 void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
 138   addRegisterClass(VT, ARM::QPRRegisterClass);
 139   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
 140 }
 141
 142 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
 143   if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
 144     return new TargetLoweringObjectFileMachO();
 145
 146   return new ARMElfTargetObjectFile();
 147 }
 148
 149 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
 150     : TargetLowering(TM, createTLOF(TM)) {
 151   Subtarget = &TM.getSubtarget<ARMSubtarget>();
 152
 153   if (Subtarget->isTargetDarwin()) {
 154     // Uses VFP for Thumb libfuncs if available.
 155     if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
 156       // Single-precision floating-point arithmetic.
 157       setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
 158       setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
 159       setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
 160       setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
 161
 162       // Double-precision floating-point arithmetic.
 163       setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
 164       setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
 165       setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
 166       setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
 167
 168       // Single-precision comparisons.
 169       setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
 170       setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
 171       setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
 172       setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
 173       setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
 174       setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
 175       setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
 176       setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
 177
 178       setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
 179       setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
 180       setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
 181       setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
 182       setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
 183       setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
 184       setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
 185       setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
 186
 187       // Double-precision comparisons.
 188       setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
 189       setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
 190       setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
 191       setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
 192       setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
 193       setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
 194       setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
 195       setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
 196
 197       setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
 198       setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
 199       setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
 200       setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
 201       setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
 202       setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
 203       setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
 204       setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
 205
 206       // Floating-point to integer conversions.
 207       // i64 conversions are done via library routines even when generating VFP
 208       // instructions, so use the same ones.
 209       setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
 210       setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
 211       setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
 212       setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
 213
 214       // Conversions between floating types.
 215       setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
 216       setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
 217
 218       // Integer to floating-point conversions.
 219       // i64 conversions are done via library routines even when generating VFP
 220       // instructions, so use the same ones.
 221       // FIXME: There appears to be some naming inconsistency in ARM libgcc:
 222       // e.g., __floatunsidf vs. __floatunssidfvfp.
 223       setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
 224       setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
 225       setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
 226       setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
 227     }
 228   }
 229
 230   // These libcalls are not available in 32-bit.
 231   setLibcallName(RTLIB::SHL_I128, 0);
 232   setLibcallName(RTLIB::SRL_I128, 0);
 233   setLibcallName(RTLIB::SRA_I128, 0);
 234
 235   // Libcalls should use the AAPCS base standard ABI, even if hard float
 236   // is in effect, as per the ARM RTABI specification, section 4.1.2.
 237   if (Subtarget->isAAPCS_ABI()) {
 238     for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
 239       setLibcallCallingConv(static_cast<RTLIB::Libcall>(i),
 240                             CallingConv::ARM_AAPCS);
 241     }
 242   }
 243
 244   if (Subtarget->isThumb1Only())
 245     addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
 246   else
 247     addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
 248   if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
 249     addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
 250     addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
 251
 252     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 253   }
 254
 255   if (Subtarget->hasNEON()) {
 256     addDRTypeForNEON(MVT::v2f32);
 257     addDRTypeForNEON(MVT::v8i8);
 258     addDRTypeForNEON(MVT::v4i16);
 259     addDRTypeForNEON(MVT::v2i32);
 260     addDRTypeForNEON(MVT::v1i64);
 261
 262     addQRTypeForNEON(MVT::v4f32);
 263     addQRTypeForNEON(MVT::v2f64);
 264     addQRTypeForNEON(MVT::v16i8);
 265     addQRTypeForNEON(MVT::v8i16);
 266     addQRTypeForNEON(MVT::v4i32);
 267     addQRTypeForNEON(MVT::v2i64);
 268
 269     // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
 270     // neither Neon nor VFP support any arithmetic operations on it.
 271     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
 272     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
 273     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
 274     setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
 275     setOperationAction(ISD::FREM, MVT::v2f64, Expand);
 276     setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
 277     setOperationAction(ISD::VSETCC, MVT::v2f64, Expand);
 278     setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
 279     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
 280     setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
 281     setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
 282     setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
 283     setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
 284     setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
 285     setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
 286     setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
 287     setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
 288     setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
 289     setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
 290     setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
 291     setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
 292     setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
 293     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
 294     setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
 295
 296     // Neon does not support some operations on v1i64 and v2i64 types.
 297     setOperationAction(ISD::MUL, MVT::v1i64, Expand);
 298     setOperationAction(ISD::MUL, MVT::v2i64, Expand);
 299     setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
 300     setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
 301
 302     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
 303     setTargetDAGCombine(ISD::SHL);
 304     setTargetDAGCombine(ISD::SRL);
 305     setTargetDAGCombine(ISD::SRA);
 306     setTargetDAGCombine(ISD::SIGN_EXTEND);
 307     setTargetDAGCombine(ISD::ZERO_EXTEND);
 308     setTargetDAGCombine(ISD::ANY_EXTEND);
 309     setTargetDAGCombine(ISD::SELECT_CC);
 310   }
 311
 312   computeRegisterProperties();
 313
 314   // ARM does not have f32 extending load.
 315   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
 316
 317   // ARM does not have i1 sign extending load.
 318   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 319
 320   // ARM supports all 4 flavors of integer indexed load / store.
 321   if (!Subtarget->isThumb1Only()) {
 322     for (unsigned im = (unsigned)ISD::PRE_INC;
 323          im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
 324       setIndexedLoadAction(im,  MVT::i1,  Legal);
 325       setIndexedLoadAction(im,  MVT::i8,  Legal);
 326       setIndexedLoadAction(im,  MVT::i16, Legal);
 327       setIndexedLoadAction(im,  MVT::i32, Legal);
 328       setIndexedStoreAction(im, MVT::i1,  Legal);
 329       setIndexedStoreAction(im, MVT::i8,  Legal);
 330       setIndexedStoreAction(im, MVT::i16, Legal);
 331       setIndexedStoreAction(im, MVT::i32, Legal);
 332     }
 333   }
 334
 335   // i64 operation support.
 336   if (Subtarget->isThumb1Only()) {
 337     setOperationAction(ISD::MUL,     MVT::i64, Expand);
 338     setOperationAction(ISD::MULHU,   MVT::i32, Expand);
 339     setOperationAction(ISD::MULHS,   MVT::i32, Expand);
 340     setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 341     setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 342   } else {
 343     setOperationAction(ISD::MUL,     MVT::i64, Expand);
 344     setOperationAction(ISD::MULHU,   MVT::i32, Expand);
 345     if (!Subtarget->hasV6Ops())
 346       setOperationAction(ISD::MULHS, MVT::i32, Expand);
 347   }
 348   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
 349   setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
 350   setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
 351   setOperationAction(ISD::SRL,       MVT::i64, Custom);
 352   setOperationAction(ISD::SRA,       MVT::i64, Custom);
 353
 354   // ARM does not have ROTL.
 355   setOperationAction(ISD::ROTL,  MVT::i32, Expand);
 356   setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
 357   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
 358   if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
 359     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
 360
 361   // Only ARMv6 has BSWAP.
 362   if (!Subtarget->hasV6Ops())
 363     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
 364
 365   // These are expanded into libcalls.
 366   if (!Subtarget->hasDivide()) {
 367     // v7M has a hardware divider
 368     setOperationAction(ISD::SDIV,  MVT::i32, Expand);
 369     setOperationAction(ISD::UDIV,  MVT::i32, Expand);
 370   }
 371   setOperationAction(ISD::SREM,  MVT::i32, Expand);
 372   setOperationAction(ISD::UREM,  MVT::i32, Expand);
 373   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
 374   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
 375
 376   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
 377   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
 378   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
 379   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
 380   setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
 381
 382   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 383
 384   // Use the default implementation.
 385   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
 386   setOperationAction(ISD::VAARG,              MVT::Other, Expand);
 387   setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
 388   setOperationAction(ISD::VAEND,              MVT::Other, Expand);
 389   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
 390   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
 391   setOperationAction(ISD::EHSELECTION,        MVT::i32,   Expand);
 392   // FIXME: Shouldn't need this, since no register is used, but the legalizer
 393   // doesn't yet know how to not do that for SjLj.
 394   setExceptionSelectorRegister(ARM::R0);
 395   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
 396   setOperationAction(ISD::MEMBARRIER,         MVT::Other, Custom);
 397
 398   // If the subtarget does not have extract instructions, sign_extend_inreg
 399   // needs to be expanded. Extract is available in ARM mode on v6 and up,
 400   // and on most Thumb2 implementations.
 401   if ((!Subtarget->isThumb() && !Subtarget->hasV6Ops())
 402       || (Subtarget->isThumb2() && !Subtarget->hasT2ExtractPack())) {
 403     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
 404     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
 405   }
 406   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 407
 408   if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only())
 409     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
 410     // iff target supports vfp2.
 411     setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
 412
 413   // We want to custom lower some of our intrinsics.
 414   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 415   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 416
 417   setOperationAction(ISD::SETCC,     MVT::i32, Expand);
 418   setOperationAction(ISD::SETCC,     MVT::f32, Expand);
 419   setOperationAction(ISD::SETCC,     MVT::f64, Expand);
 420   setOperationAction(ISD::SELECT,    MVT::i32, Expand);
 421   setOperationAction(ISD::SELECT,    MVT::f32, Expand);
 422   setOperationAction(ISD::SELECT,    MVT::f64, Expand);
 423   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
 424   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
 425   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
 426
 427   setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
 428   setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
 429   setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
 430   setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
 431   setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
 432
 433   // We don't support sin/cos/fmod/copysign/pow
 434   setOperationAction(ISD::FSIN,      MVT::f64, Expand);
 435   setOperationAction(ISD::FSIN,      MVT::f32, Expand);
 436   setOperationAction(ISD::FCOS,      MVT::f32, Expand);
 437   setOperationAction(ISD::FCOS,      MVT::f64, Expand);
 438   setOperationAction(ISD::FREM,      MVT::f64, Expand);
 439   setOperationAction(ISD::FREM,      MVT::f32, Expand);
 440   if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
 441     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
 442     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
 443   }
 444   setOperationAction(ISD::FPOW,      MVT::f64, Expand);
 445   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
 446
 447   // Various VFP goodness
 448   if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
 449     // int <-> fp are custom expanded into bit_convert + ARMISD ops.
 450     if (Subtarget->hasVFP2()) {
 451       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 452       setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 453       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 454       setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 455     }
 456     // Special handling for half-precision FP.
 457     if (!Subtarget->hasFP16()) {
 458       setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
 459       setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
 460     }
 461   }
 462
 463   // We have target-specific dag combine patterns for the following nodes:
 464   // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
 465   setTargetDAGCombine(ISD::ADD);
 466   setTargetDAGCombine(ISD::SUB);
 467   setTargetDAGCombine(ISD::MUL);
 468
 469   setStackPointerRegisterToSaveRestore(ARM::SP);
 470
 471   if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
 472     setSchedulingPreference(Sched::RegPressure);
 473   else
 474     setSchedulingPreference(Sched::Hybrid);
 475
 476   // FIXME: If-converter should use instruction latency to determine
 477   // profitability rather than relying on fixed limits.
 478   if (Subtarget->getCPUString() == "generic") {
 479     // Generic (and overly aggressive) if-conversion limits.
 480     setIfCvtBlockSizeLimit(10);
 481     setIfCvtDupBlockSizeLimit(2);
 482   } else if (Subtarget->hasV7Ops()) {
 483     setIfCvtBlockSizeLimit(3);
 484     setIfCvtDupBlockSizeLimit(1);
 485   } else if (Subtarget->hasV6Ops()) {
 486     setIfCvtBlockSizeLimit(2);
 487     setIfCvtDupBlockSizeLimit(1);
 488   } else {
 489     setIfCvtBlockSizeLimit(3);
 490     setIfCvtDupBlockSizeLimit(2);
 491   }
 492
 493   maxStoresPerMemcpy = 1;   //// temporary - rewrite interface to use type
 494   // Do not enable CodePlacementOpt for now: it currently runs after the
 495   // ARMConstantIslandPass and messes up branch relaxation and placement
 496   // of constant islands.
 497   // benefitFromCodePlacementOpt = true;
 498 }
 499
 500 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
 501   switch (Opcode) {
 502   default: return 0;
 503   case ARMISD::Wrapper:       return "ARMISD::Wrapper";
 504   case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
 505   case ARMISD::CALL:          return "ARMISD::CALL";
 506   case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
 507   case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
 508   case ARMISD::tCALL:         return "ARMISD::tCALL";
 509   case ARMISD::BRCOND:        return "ARMISD::BRCOND";
 510   case ARMISD::BR_JT:         return "ARMISD::BR_JT";
 511   case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
 512   case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
 513   case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
 514   case ARMISD::CMP:           return "ARMISD::CMP";
 515   case ARMISD::CMPZ:          return "ARMISD::CMPZ";
 516   case ARMISD::CMPFP:         return "ARMISD::CMPFP";
 517   case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
 518   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
 519   case ARMISD::CMOV:          return "ARMISD::CMOV";
 520   case ARMISD::CNEG:          return "ARMISD::CNEG";
 521
 522   case ARMISD::RBIT:          return "ARMISD::RBIT";
 523
 524   case ARMISD::FTOSI:         return "ARMISD::FTOSI";
 525   case ARMISD::FTOUI:         return "ARMISD::FTOUI";
 526   case ARMISD::SITOF:         return "ARMISD::SITOF";
 527   case ARMISD::UITOF:         return "ARMISD::UITOF";
 528
 529   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
 530   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
 531   case ARMISD::RRX:           return "ARMISD::RRX";
 532
 533   case ARMISD::VMOVRRD:         return "ARMISD::VMOVRRD";
 534   case ARMISD::VMOVDRR:         return "ARMISD::VMOVDRR";
 535
 536   case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
 537   case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
 538
 539   case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
 540
 541   case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
 542
 543   case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
 544   case ARMISD::SYNCBARRIER:   return "ARMISD::SYNCBARRIER";
 545
 546   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
 547   case ARMISD::VCGE:          return "ARMISD::VCGE";
 548   case ARMISD::VCGEU:         return "ARMISD::VCGEU";
 549   case ARMISD::VCGT:          return "ARMISD::VCGT";
 550   case ARMISD::VCGTU:         return "ARMISD::VCGTU";
 551   case ARMISD::VTST:          return "ARMISD::VTST";
 552
 553   case ARMISD::VSHL:          return "ARMISD::VSHL";
 554   case ARMISD::VSHRs:         return "ARMISD::VSHRs";
 555   case ARMISD::VSHRu:         return "ARMISD::VSHRu";
 556   case ARMISD::VSHLLs:        return "ARMISD::VSHLLs";
 557   case ARMISD::VSHLLu:        return "ARMISD::VSHLLu";
 558   case ARMISD::VSHLLi:        return "ARMISD::VSHLLi";
 559   case ARMISD::VSHRN:         return "ARMISD::VSHRN";
 560   case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
 561   case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
 562   case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
 563   case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
 564   case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
 565   case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
 566   case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
 567   case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
 568   case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
 569   case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
 570   case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
 571   case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
 572   case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
 573   case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
 574   case ARMISD::VDUP:          return "ARMISD::VDUP";
 575   case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
 576   case ARMISD::VEXT:          return "ARMISD::VEXT";
 577   case ARMISD::VREV64:        return "ARMISD::VREV64";
 578   case ARMISD::VREV32:        return "ARMISD::VREV32";
 579   case ARMISD::VREV16:        return "ARMISD::VREV16";
 580   case ARMISD::VZIP:          return "ARMISD::VZIP";
 581   case ARMISD::VUZP:          return "ARMISD::VUZP";
 582   case ARMISD::VTRN:          return "ARMISD::VTRN";
 583   case ARMISD::FMAX:          return "ARMISD::FMAX";
 584   case ARMISD::FMIN:          return "ARMISD::FMIN";
 585   }
 586 }
 587
 588 /// getRegClassFor - Return the register class that should be used for the
 589 /// specified value type.
 590 TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
 591   // Map v4i64 to QQ registers but do not make the type legal. Similarly map
 592   // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
 593   // load / store 4 to 8 consecutive D registers.
 594   if (Subtarget->hasNEON()) {
 595     if (VT == MVT::v4i64)
 596       return ARM::QQPRRegisterClass;
 597     else if (VT == MVT::v8i64)
 598       return ARM::QQQQPRRegisterClass;
 599   }
 600   return TargetLowering::getRegClassFor(VT);
 601 }
 602
 603 /// getFunctionAlignment - Return the Log2 alignment of this function.
 604 unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
 605   return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1;
 606 }
 607
 608 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
 609   for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
 610     EVT VT = N->getValueType(i);
 611     if (VT.isFloatingPoint() || VT.isVector())
 612       return Sched::Latency;
 613   }
 614   return Sched::RegPressure;
 615 }
 616
 617 //===----------------------------------------------------------------------===//
 618 // Lowering Code
 619 //===----------------------------------------------------------------------===//
 620
 621 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
 622 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
 623   switch (CC) {
 624   default: llvm_unreachable("Unknown condition code!");
 625   case ISD::SETNE:  return ARMCC::NE;
 626   case ISD::SETEQ:  return ARMCC::EQ;
 627   case ISD::SETGT:  return ARMCC::GT;
 628   case ISD::SETGE:  return ARMCC::GE;
 629   case ISD::SETLT:  return ARMCC::LT;
 630   case ISD::SETLE:  return ARMCC::LE;
 631   case ISD::SETUGT: return ARMCC::HI;
 632   case ISD::SETUGE: return ARMCC::HS;
 633   case ISD::SETULT: return ARMCC::LO;
 634   case ISD::SETULE: return ARMCC::LS;
 635   }
 636 }
 637
 638 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
 639 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
 640                         ARMCC::CondCodes &CondCode2) {
 641   CondCode2 = ARMCC::AL;
 642   switch (CC) {
 643   default: llvm_unreachable("Unknown FP condition!");
 644   case ISD::SETEQ:
 645   case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
 646   case ISD::SETGT:
 647   case ISD::SETOGT: CondCode = ARMCC::GT; break;
 648   case ISD::SETGE:
 649   case ISD::SETOGE: CondCode = ARMCC::GE; break;
 650   case ISD::SETOLT: CondCode = ARMCC::MI; break;
 651   case ISD::SETOLE: CondCode = ARMCC::LS; break;
 652   case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
 653   case ISD::SETO:   CondCode = ARMCC::VC; break;
 654   case ISD::SETUO:  CondCode = ARMCC::VS; break;
 655   case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
 656   case ISD::SETUGT: CondCode = ARMCC::HI; break;
 657   case ISD::SETUGE: CondCode = ARMCC::PL; break;
 658   case ISD::SETLT:
 659   case ISD::SETULT: CondCode = ARMCC::LT; break;
 660   case ISD::SETLE:
 661   case ISD::SETULE: CondCode = ARMCC::LE; break;
 662   case ISD::SETNE:
 663   case ISD::SETUNE: CondCode = ARMCC::NE; break;
 664   }
 665 }
 666
 667 //===----------------------------------------------------------------------===//
 668 //                      Calling Convention Implementation
 669 //===----------------------------------------------------------------------===//
 670
 671 #include "ARMGenCallingConv.inc"
 672
 673 // APCS f64 is in register pairs, possibly split to stack
 674 static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
 675                           CCValAssign::LocInfo &LocInfo,
 676                           CCState &State, bool CanFail) {
 677   static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
 678
 679   // Try to get the first register.
 680   if (unsigned Reg = State.AllocateReg(RegList, 4))
 681     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
 682   else {
 683     // For the 2nd half of a v2f64, do not fail.
 684     if (CanFail)
 685       return false;
 686
 687     // Put the whole thing on the stack.
 688     State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
 689                                            State.AllocateStack(8, 4),
 690                                            LocVT, LocInfo));
 691     return true;
 692   }
 693
 694   // Try to get the second register.
 695   if (unsigned Reg = State.AllocateReg(RegList, 4))
 696     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
 697   else
 698     State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
 699                                            State.AllocateStack(4, 4),
 700                                            LocVT, LocInfo));
 701   return true;
 702 }
 703
 704 static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
 705                                    CCValAssign::LocInfo &LocInfo,
 706                                    ISD::ArgFlagsTy &ArgFlags,
 707                                    CCState &State) {
 708   if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
 709     return false;
 710   if (LocVT == MVT::v2f64 &&
 711       !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
 712     return false;
 713   return true;  // we handled it
 714 }
 715
 716 // AAPCS f64 is in aligned register pairs
 717 static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
 718                            CCValAssign::LocInfo &LocInfo,
 719                            CCState &State, bool CanFail) {
 720   static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
 721   static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
 722
 723   unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
 724   if (Reg == 0) {
 725     // For the 2nd half of a v2f64, do not just fail.
 726     if (CanFail)
 727       return false;
 728
 729     // Put the whole thing on the stack.
 730     State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
 731                                            State.AllocateStack(8, 8),
 732                                            LocVT, LocInfo));
 733     return true;
 734   }
 735
 736   unsigned i;
 737   for (i = 0; i < 2; ++i)
 738     if (HiRegList[i] == Reg)
 739       break;
 740
 741   State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
 742   State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
 743                                          LocVT, LocInfo));
 744   return true;
 745 }
 746
 747 static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
 748                                     CCValAssign::LocInfo &LocInfo,
 749                                     ISD::ArgFlagsTy &ArgFlags,
 750                                     CCState &State) {
 751   if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
 752     return false;
 753   if (LocVT == MVT::v2f64 &&
 754       !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
 755     return false;
 756   return true;  // we handled it
 757 }
 758
 759 static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
 760                          CCValAssign::LocInfo &LocInfo, CCState &State) {
 761   static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
 762   static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
 763
 764   unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
 765   if (Reg == 0)
 766     return false; // we didn't handle it
 767
 768   unsigned i;
 769   for (i = 0; i < 2; ++i)
 770     if (HiRegList[i] == Reg)
 771       break;
 772
 773   State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
 774   State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
 775                                          LocVT, LocInfo));
 776   return true;
 777 }
 778
 779 static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
 780                                       CCValAssign::LocInfo &LocInfo,
 781                                       ISD::ArgFlagsTy &ArgFlags,
 782                                       CCState &State) {
 783   if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
 784     return false;
 785   if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
 786     return false;
 787   return true;  // we handled it
 788 }
 789
 790 static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
 791                                        CCValAssign::LocInfo &LocInfo,
 792                                        ISD::ArgFlagsTy &ArgFlags,
 793                                        CCState &State) {
 794   return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
 795                                    State);
 796 }
 797
 798 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the
 799 /// given CallingConvention value.
 800 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
 801                                                  bool Return,
 802                                                  bool isVarArg) const {
 803   switch (CC) {
 804   default:
 805     llvm_unreachable("Unsupported calling convention");
 806   case CallingConv::C:
 807   case CallingConv::Fast:
 808     // Use target triple & subtarget features to do actual dispatch.
 809     if (Subtarget->isAAPCS_ABI()) {
 810       if (Subtarget->hasVFP2() &&
 811           FloatABIType == FloatABI::Hard && !isVarArg)
 812         return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
 813       else
 814         return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
 815     } else
 816         return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
 817   case CallingConv::ARM_AAPCS_VFP:
 818     return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
 819   case CallingConv::ARM_AAPCS:
 820     return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
 821   case CallingConv::ARM_APCS:
 822     return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
 823   }
 824 }
 825
 826 /// LowerCallResult - Lower the result values of a call into the
 827 /// appropriate copies out of appropriate physical registers.
 828 SDValue
 829 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
 830                                    CallingConv::ID CallConv, bool isVarArg,
 831                                    const SmallVectorImpl<ISD::InputArg> &Ins,
 832                                    DebugLoc dl, SelectionDAG &DAG,
 833                                    SmallVectorImpl<SDValue> &InVals) const {
 834
 835   // Assign locations to each value returned by this call.
 836   SmallVector<CCValAssign, 16> RVLocs;
 837   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
 838                  RVLocs, *DAG.getContext());
 839   CCInfo.AnalyzeCallResult(Ins,
 840                            CCAssignFnForNode(CallConv, /* Return*/ true,
 841                                              isVarArg));
 842
 843   // Copy all of the result registers out of their specified physreg.
 844   for (unsigned i = 0; i != RVLocs.size(); ++i) {
 845     CCValAssign VA = RVLocs[i];
 846
 847     SDValue Val;
 848     if (VA.needsCustom()) {
 849       // Handle f64 or half of a v2f64.
 850       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
 851                                       InFlag);
 852       Chain = Lo.getValue(1);
 853       InFlag = Lo.getValue(2);
 854       VA = RVLocs[++i]; // skip ahead to next loc
 855       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
 856                                       InFlag);
 857       Chain = Hi.getValue(1);
 858       InFlag = Hi.getValue(2);
 859       Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
 860
 861       if (VA.getLocVT() == MVT::v2f64) {
 862         SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
 863         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
 864                           DAG.getConstant(0, MVT::i32));
 865
 866         VA = RVLocs[++i]; // skip ahead to next loc
 867         Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
 868         Chain = Lo.getValue(1);
 869         InFlag = Lo.getValue(2);
 870         VA = RVLocs[++i]; // skip ahead to next loc
 871         Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
 872         Chain = Hi.getValue(1);
 873         InFlag = Hi.getValue(2);
 874         Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
 875         Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
 876                           DAG.getConstant(1, MVT::i32));
 877       }
 878     } else {
 879       Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
 880                                InFlag);
 881       Chain = Val.getValue(1);
 882       InFlag = Val.getValue(2);
 883     }
 884
 885     switch (VA.getLocInfo()) {
 886     default: llvm_unreachable("Unknown loc info!");
 887     case CCValAssign::Full: break;
 888     case CCValAssign::BCvt:
 889       Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val);
 890       break;
 891     }
 892
 893     InVals.push_back(Val);
 894   }
 895
 896   return Chain;
 897 }
 898
 899 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
 900 /// by "Src" to address "Dst" of size "Size".  Alignment information is
 901 /// specified by the specific parameter attribute.  The copy will be passed as
 902 /// a byval function parameter.
 903 /// Sometimes what we are copying is the end of a larger object, the part that
 904 /// does not fit in registers.
 905 static SDValue
 906 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
 907                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
 908                           DebugLoc dl) {
 909   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
 910   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
 911                        /*isVolatile=*/false, /*AlwaysInline=*/false,
 912                        NULL, 0, NULL, 0);
 913 }
 914
 915 /// LowerMemOpCallTo - Store the argument to the stack.
 916 SDValue
 917 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
 918                                     SDValue StackPtr, SDValue Arg,
 919                                     DebugLoc dl, SelectionDAG &DAG,
 920                                     const CCValAssign &VA,
 921                                     ISD::ArgFlagsTy Flags) const {
 922   unsigned LocMemOffset = VA.getLocMemOffset();
 923   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
 924   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
 925   if (Flags.isByVal()) {
 926     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
 927   }
 928   return DAG.getStore(Chain, dl, Arg, PtrOff,
 929                       PseudoSourceValue::getStack(), LocMemOffset,
 930                       false, false, 0);
 931 }
 932
 933 void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
 934                                          SDValue Chain, SDValue &Arg,
 935                                          RegsToPassVector &RegsToPass,
 936                                          CCValAssign &VA, CCValAssign &NextVA,
 937                                          SDValue &StackPtr,
 938                                          SmallVector<SDValue, 8> &MemOpChains,
 939                                          ISD::ArgFlagsTy Flags) const {
 940
 941   SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
 942                               DAG.getVTList(MVT::i32, MVT::i32), Arg);
 943   RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
 944
 945   if (NextVA.isRegLoc())
 946     RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
 947   else {
 948     assert(NextVA.isMemLoc());
 949     if (StackPtr.getNode() == 0)
 950       StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
 951
 952     MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
 953                                            dl, DAG, NextVA,
 954                                            Flags));
 955   }
 956 }
 957
 958 /// LowerCall - Lowering a call into a callseq_start <-
 959 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
 960 /// nodes.
 961 SDValue
 962 ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 963                              CallingConv::ID CallConv, bool isVarArg,
 964                              bool &isTailCall,
 965                              const SmallVectorImpl<ISD::OutputArg> &Outs,
 966                              const SmallVectorImpl<ISD::InputArg> &Ins,
 967                              DebugLoc dl, SelectionDAG &DAG,
 968                              SmallVectorImpl<SDValue> &InVals) const {
 969   // ARM target does not yet support tail call optimization.
 970   isTailCall = false;
 971
 972   // Analyze operands of the call, assigning locations to each operand.
 973   SmallVector<CCValAssign, 16> ArgLocs;
 974   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
 975                  *DAG.getContext());
 976   CCInfo.AnalyzeCallOperands(Outs,
 977                              CCAssignFnForNode(CallConv, /* Return*/ false,
 978                                                isVarArg));
 979
 980   // Get a count of how many bytes are to be pushed on the stack.
 981   unsigned NumBytes = CCInfo.getNextStackOffset();
 982
 983   // Adjust the stack pointer for the new arguments...
 984   // These operations are automatically eliminated by the prolog/epilog pass
 985   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
 986
 987   SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
 988
 989   RegsToPassVector RegsToPass;
 990   SmallVector<SDValue, 8> MemOpChains;
 991
 992   // Walk the register/memloc assignments, inserting copies/loads.  In the case
 993   // of tail call optimization, arguments are handled later.
 994   for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
 995        i != e;
 996        ++i, ++realArgIdx) {
 997     CCValAssign &VA = ArgLocs[i];
 998     SDValue Arg = Outs[realArgIdx].Val;
 999     ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1000
1001     // Promote the value if needed.
1002     switch (VA.getLocInfo()) {
1003     default: llvm_unreachable("Unknown loc info!");
1004     case CCValAssign::Full: break;
1005     case CCValAssign::SExt:
1006       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1007       break;
1008     case CCValAssign::ZExt:
1009       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1010       break;
1011     case CCValAssign::AExt:
1012       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1013       break;
1014     case CCValAssign::BCvt:
1015       Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
1016       break;
1017     }
1018
1019     // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1020     if (VA.needsCustom()) {
1021       if (VA.getLocVT() == MVT::v2f64) {
1022         SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1023                                   DAG.getConstant(0, MVT::i32));
1024         SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1025                                   DAG.getConstant(1, MVT::i32));
1026
1027         PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1028                          VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1029
1030         VA = ArgLocs[++i]; // skip ahead to next loc
1031         if (VA.isRegLoc()) {
1032           PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1033                            VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1034         } else {
1035           assert(VA.isMemLoc());
1036
1037           MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1038                                                  dl, DAG, VA, Flags));
1039         }
1040       } else {
1041         PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1042                          StackPtr, MemOpChains, Flags);
1043       }
1044     } else if (VA.isRegLoc()) {
1045       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1046     } else {
1047       assert(VA.isMemLoc());
1048
1049       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1050                                              dl, DAG, VA, Flags));
1051     }
1052   }
1053
1054   if (!MemOpChains.empty())
1055     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1056                         &MemOpChains[0], MemOpChains.size());
1057
1058   // Build a sequence of copy-to-reg nodes chained together with token chain
1059   // and flag operands which copy the outgoing args into the appropriate regs.
1060   SDValue InFlag;
1061   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1062     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1063                              RegsToPass[i].second, InFlag);
1064     InFlag = Chain.getValue(1);
1065   }
1066
1067   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1068   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1069   // node so that legalize doesn't hack it.
1070   bool isDirect = false;
1071   bool isARMFunc = false;
1072   bool isLocalARMFunc = false;
1073   MachineFunction &MF = DAG.getMachineFunction();
1074   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1075
1076   if (EnableARMLongCalls) {
1077     assert (getTargetMachine().getRelocationModel() == Reloc::Static
1078             && "long-calls with non-static relocation model!");
1079     // Handle a global address or an external symbol. If it's not one of
1080     // those, the target's already in a register, so we don't need to do
1081     // anything extra.
1082     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1083       const GlobalValue *GV = G->getGlobal();
1084       // Create a constant pool entry for the callee address
1085       unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1086       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
1087                                                            ARMPCLabelIndex,
1088                                                            ARMCP::CPValue, 0);
1089       // Get the address of the callee into a register
1090       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1091       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1092       Callee = DAG.getLoad(getPointerTy(), dl,
1093                            DAG.getEntryNode(), CPAddr,
1094                            PseudoSourceValue::getConstantPool(), 0,
1095                            false, false, 0);
1096     } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
1097       const char *Sym = S->getSymbol();
1098
1099       // Create a constant pool entry for the callee address
1100       unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1101       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1102                                                        Sym, ARMPCLabelIndex, 0);
1103       // Get the address of the callee into a register
1104       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1105       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1106       Callee = DAG.getLoad(getPointerTy(), dl,
1107                            DAG.getEntryNode(), CPAddr,
1108                            PseudoSourceValue::getConstantPool(), 0,
1109                            false, false, 0);
1110     }
1111   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1112     const GlobalValue *GV = G->getGlobal();
1113     isDirect = true;
1114     bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
1115     bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
1116                    getTargetMachine().getRelocationModel() != Reloc::Static;
1117     isARMFunc = !Subtarget->isThumb() || isStub;
1118     // ARM call to a local ARM function is predicable.
1119     isLocalARMFunc = !Subtarget->isThumb() && !isExt;
1120     // tBX takes a register source operand.
1121     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1122       unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1123       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
1124                                                            ARMPCLabelIndex,
1125                                                            ARMCP::CPValue, 4);
1126       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1127       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1128       Callee = DAG.getLoad(getPointerTy(), dl,
1129                            DAG.getEntryNode(), CPAddr,
1130                            PseudoSourceValue::getConstantPool(), 0,
1131                            false, false, 0);
1132       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1133       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1134                            getPointerTy(), Callee, PICLabel);
1135     } else
1136       Callee = DAG.getTargetGlobalAddress(GV, getPointerTy());
1137   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1138     isDirect = true;
1139     bool isStub = Subtarget->isTargetDarwin() &&
1140                   getTargetMachine().getRelocationModel() != Reloc::Static;
1141     isARMFunc = !Subtarget->isThumb() || isStub;
1142     // tBX takes a register source operand.
1143     const char *Sym = S->getSymbol();
1144     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1145       unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1146       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1147                                                        Sym, ARMPCLabelIndex, 4);
1148       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1149       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1150       Callee = DAG.getLoad(getPointerTy(), dl,
1151                            DAG.getEntryNode(), CPAddr,
1152                            PseudoSourceValue::getConstantPool(), 0,
1153                            false, false, 0);
1154       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1155       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1156                            getPointerTy(), Callee, PICLabel);
1157     } else
1158       Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
1159   }
1160
1161   // FIXME: handle tail calls differently.
1162   unsigned CallOpc;
1163   if (Subtarget->isThumb()) {
1164     if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
1165       CallOpc = ARMISD::CALL_NOLINK;
1166     else
1167       CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
1168   } else {
1169     CallOpc = (isDirect || Subtarget->hasV5TOps())
1170       ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
1171       : ARMISD::CALL_NOLINK;
1172   }
1173   if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb1Only()) {
1174     // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK
1175     Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag);
1176     InFlag = Chain.getValue(1);
1177   }
1178
1179   std::vector<SDValue> Ops;
1180   Ops.push_back(Chain);
1181   Ops.push_back(Callee);
1182
1183   // Add argument registers to the end of the list so that they are known live
1184   // into the call.
1185   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1186     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1187                                   RegsToPass[i].second.getValueType()));
1188
1189   if (InFlag.getNode())
1190     Ops.push_back(InFlag);
1191   // Returns a chain and a flag for retval copy to use.
1192   Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1193                       &Ops[0], Ops.size());
1194   InFlag = Chain.getValue(1);
1195
1196   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
1197                              DAG.getIntPtrConstant(0, true), InFlag);
1198   if (!Ins.empty())
1199     InFlag = Chain.getValue(1);
1200
1201   // Handle result values, copying them out of physregs into vregs that we
1202   // return.
1203   return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
1204                          dl, DAG, InVals);
1205 }
1206
1207 SDValue
1208 ARMTargetLowering::LowerReturn(SDValue Chain,
1209                                CallingConv::ID CallConv, bool isVarArg,
1210                                const SmallVectorImpl<ISD::OutputArg> &Outs,
1211                                DebugLoc dl, SelectionDAG &DAG) const {
1212
1213   // CCValAssign - represent the assignment of the return value to a location.
1214   SmallVector<CCValAssign, 16> RVLocs;
1215
1216   // CCState - Info about the registers and stack slots.
1217   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
1218                  *DAG.getContext());
1219
1220   // Analyze outgoing return values.
1221   CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
1222                                                isVarArg));
1223
1224   // If this is the first return lowered for this function, add
1225   // the regs to the liveout set for the function.
1226   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1227     for (unsigned i = 0; i != RVLocs.size(); ++i)
1228       if (RVLocs[i].isRegLoc())
1229         DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1230   }
1231
1232   SDValue Flag;
1233
1234   // Copy the result values into the output registers.
1235   for (unsigned i = 0, realRVLocIdx = 0;
1236        i != RVLocs.size();
1237        ++i, ++realRVLocIdx) {
1238     CCValAssign &VA = RVLocs[i];
1239     assert(VA.isRegLoc() && "Can only return in registers!");
1240
1241     SDValue Arg = Outs[realRVLocIdx].Val;
1242
1243     switch (VA.getLocInfo()) {
1244     default: llvm_unreachable("Unknown loc info!");
1245     case CCValAssign::Full: break;
1246     case CCValAssign::BCvt:
1247       Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
1248       break;
1249     }
1250
1251     if (VA.needsCustom()) {
1252       if (VA.getLocVT() == MVT::v2f64) {
1253         // Extract the first half and return it in two registers.
1254         SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1255                                    DAG.getConstant(0, MVT::i32));
1256         SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
1257                                        DAG.getVTList(MVT::i32, MVT::i32), Half);
1258
1259         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
1260         Flag = Chain.getValue(1);
1261         VA = RVLocs[++i]; // skip ahead to next loc
1262         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1263                                  HalfGPRs.getValue(1), Flag);
1264         Flag = Chain.getValue(1);
1265         VA = RVLocs[++i]; // skip ahead to next loc
1266
1267         // Extract the 2nd half and fall through to handle it as an f64 value.
1268         Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1269                           DAG.getConstant(1, MVT::i32));
1270       }
1271       // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
1272       // available.
1273       SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1274                                   DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
1275       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
1276       Flag = Chain.getValue(1);
1277       VA = RVLocs[++i]; // skip ahead to next loc
1278       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
1279                                Flag);
1280     } else
1281       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
1282
1283     // Guarantee that all emitted copies are
1284     // stuck together, avoiding something bad.
1285     Flag = Chain.getValue(1);
1286   }
1287
1288   SDValue result;
1289   if (Flag.getNode())
1290     result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1291   else // Return Void
1292     result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
1293
1294   return result;
1295 }
1296
1297 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
1298 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
1299 // one of the above mentioned nodes. It has to be wrapped because otherwise
1300 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
1301 // be used to form addressing mode. These wrapped nodes will be selected
1302 // into MOVi.
1303 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
1304   EVT PtrVT = Op.getValueType();
1305   // FIXME there is no actual debug info here
1306   DebugLoc dl = Op.getDebugLoc();
1307   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1308   SDValue Res;
1309   if (CP->isMachineConstantPoolEntry())
1310     Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
1311                                     CP->getAlignment());
1312   else
1313     Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
1314                                     CP->getAlignment());
1315   return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
1316 }
1317
1318 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
1319                                              SelectionDAG &DAG) const {
1320   MachineFunction &MF = DAG.getMachineFunction();
1321   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1322   unsigned ARMPCLabelIndex = 0;
1323   DebugLoc DL = Op.getDebugLoc();
1324   EVT PtrVT = getPointerTy();
1325   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1326   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1327   SDValue CPAddr;
1328   if (RelocM == Reloc::Static) {
1329     CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
1330   } else {
1331     unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
1332     ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1333     ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
1334                                                          ARMCP::CPBlockAddress,
1335                                                          PCAdj);
1336     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1337   }
1338   CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
1339   SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
1340                                PseudoSourceValue::getConstantPool(), 0,
1341                                false, false, 0);
1342   if (RelocM == Reloc::Static)
1343     return Result;
1344   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1345   return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
1346 }
1347
1348 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
1349 SDValue
1350 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1351                                                  SelectionDAG &DAG) const {
1352   DebugLoc dl = GA->getDebugLoc();
1353   EVT PtrVT = getPointerTy();
1354   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1355   MachineFunction &MF = DAG.getMachineFunction();
1356   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1357   unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1358   ARMConstantPoolValue *CPV =
1359     new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
1360                              ARMCP::CPValue, PCAdj, "tlsgd", true);
1361   SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1362   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
1363   Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
1364                          PseudoSourceValue::getConstantPool(), 0,
1365                          false, false, 0);
1366   SDValue Chain = Argument.getValue(1);
1367
1368   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1369   Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
1370
1371   // call __tls_get_addr.
1372   ArgListTy Args;
1373   ArgListEntry Entry;
1374   Entry.Node = Argument;
1375   Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
1376   Args.push_back(Entry);
1377   // FIXME: is there useful debug info available here?
1378   std::pair<SDValue, SDValue> CallResult =
1379     LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
1380                 false, false, false, false,
1381                 0, CallingConv::C, false, /*isReturnValueUsed=*/true,
1382                 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
1383   return CallResult.first;
1384 }
1385
1386 // Lower ISD::GlobalTLSAddress using the "initial exec" or
1387 // "local exec" model.
1388 SDValue
1389 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
1390                                         SelectionDAG &DAG) const {
1391   const GlobalValue *GV = GA->getGlobal();
1392   DebugLoc dl = GA->getDebugLoc();
1393   SDValue Offset;
1394   SDValue Chain = DAG.getEntryNode();
1395   EVT PtrVT = getPointerTy();
1396   // Get the Thread Pointer
1397   SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
1398
1399   if (GV->isDeclaration()) {
1400     MachineFunction &MF = DAG.getMachineFunction();
1401     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1402     unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1403     // Initial exec model.
1404     unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1405     ARMConstantPoolValue *CPV =
1406       new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
1407                                ARMCP::CPValue, PCAdj, "gottpoff", true);
1408     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1409     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
1410     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1411                          PseudoSourceValue::getConstantPool(), 0,
1412                          false, false, 0);
1413     Chain = Offset.getValue(1);
1414
1415     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1416     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
1417
1418     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1419                          PseudoSourceValue::getConstantPool(), 0,
1420                          false, false, 0);
1421   } else {
1422     // local exec model
1423     ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff");
1424     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1425     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
1426     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1427                          PseudoSourceValue::getConstantPool(), 0,
1428                          false, false, 0);
1429   }
1430
1431   // The address of the thread local variable is the add of the thread
1432   // pointer with the offset of the variable.
1433   return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
1434 }
1435
1436 SDValue
1437 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
1438   // TODO: implement the "local dynamic" model
1439   assert(Subtarget->isTargetELF() &&
1440          "TLS not implemented for non-ELF targets");
1441   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1442   // If the relocation model is PIC, use the "General Dynamic" TLS Model,
1443   // otherwise use the "Local Exec" TLS Model
1444   if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
1445     return LowerToTLSGeneralDynamicModel(GA, DAG);
1446   else
1447     return LowerToTLSExecModels(GA, DAG);
1448 }
1449
1450 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
1451                                                  SelectionDAG &DAG) const {
1452   EVT PtrVT = getPointerTy();
1453   DebugLoc dl = Op.getDebugLoc();
1454   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1455   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1456   if (RelocM == Reloc::PIC_) {
1457     bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
1458     ARMConstantPoolValue *CPV =
1459       new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT");
1460     SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1461     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1462     SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
1463                                  CPAddr,
1464                                  PseudoSourceValue::getConstantPool(), 0,
1465                                  false, false, 0);
1466     SDValue Chain = Result.getValue(1);
1467     SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
1468     Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
1469     if (!UseGOTOFF)
1470       Result = DAG.getLoad(PtrVT, dl, Chain, Result,
1471                            PseudoSourceValue::getGOT(), 0,
1472                            false, false, 0);
1473     return Result;
1474   } else {
1475     // If we have T2 ops, we can materialize the address directly via movt/movw
1476     // pair. This is always cheaper.
1477     if (Subtarget->useMovt()) {
1478       return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
1479                          DAG.getTargetGlobalAddress(GV, PtrVT));
1480     } else {
1481       SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
1482       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1483       return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1484                          PseudoSourceValue::getConstantPool(), 0,
1485                          false, false, 0);
1486     }
1487   }
1488 }
1489
1490 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
1491                                                     SelectionDAG &DAG) const {
1492   MachineFunction &MF = DAG.getMachineFunction();
1493   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1494   unsigned ARMPCLabelIndex = 0;
1495   EVT PtrVT = getPointerTy();
1496   DebugLoc dl = Op.getDebugLoc();
1497   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1498   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1499   SDValue CPAddr;
1500   if (RelocM == Reloc::Static)
1501     CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
1502   else {
1503     ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1504     unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
1505     ARMConstantPoolValue *CPV =
1506       new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
1507     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1508   }
1509   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1510
1511   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1512                                PseudoSourceValue::getConstantPool(), 0,
1513                                false, false, 0);
1514   SDValue Chain = Result.getValue(1);
1515
1516   if (RelocM == Reloc::PIC_) {
1517     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1518     Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1519   }
1520
1521   if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
1522     Result = DAG.getLoad(PtrVT, dl, Chain, Result,
1523                          PseudoSourceValue::getGOT(), 0,
1524                          false, false, 0);
1525
1526   return Result;
1527 }
1528
1529 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
1530                                                     SelectionDAG &DAG) const {
1531   assert(Subtarget->isTargetELF() &&
1532          "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
1533   MachineFunction &MF = DAG.getMachineFunction();
1534   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1535   unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1536   EVT PtrVT = getPointerTy();
1537   DebugLoc dl = Op.getDebugLoc();
1538   unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
1539   ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1540                                                        "_GLOBAL_OFFSET_TABLE_",
1541                                                        ARMPCLabelIndex, PCAdj);
1542   SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1543   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1544   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1545                                PseudoSourceValue::getConstantPool(), 0,
1546                                false, false, 0);
1547   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1548   return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1549 }
1550
1551 SDValue
1552 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
1553   DebugLoc dl = Op.getDebugLoc();
1554   return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
1555                      Op.getOperand(1), DAG.getConstant(0, MVT::i32));
1556 }
1557
1558 SDValue
1559 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
1560                                            const ARMSubtarget *Subtarget)
1561                                              const {
1562   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1563   DebugLoc dl = Op.getDebugLoc();
1564   switch (IntNo) {
1565   default: return SDValue();    // Don't custom lower most intrinsics.
1566   case Intrinsic::arm_thread_pointer: {
1567     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1568     return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
1569   }
1570   case Intrinsic::eh_sjlj_lsda: {
1571     MachineFunction &MF = DAG.getMachineFunction();
1572     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1573     unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1574     EVT PtrVT = getPointerTy();
1575     DebugLoc dl = Op.getDebugLoc();
1576     Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1577     SDValue CPAddr;
1578     unsigned PCAdj = (RelocM != Reloc::PIC_)
1579       ? 0 : (Subtarget->isThumb() ? 4 : 8);
1580     ARMConstantPoolValue *CPV =
1581       new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex,
1582                                ARMCP::CPLSDA, PCAdj);
1583     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1584     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1585     SDValue Result =
1586       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1587                   PseudoSourceValue::getConstantPool(), 0,
1588                   false, false, 0);
1589     SDValue Chain = Result.getValue(1);
1590
1591     if (RelocM == Reloc::PIC_) {
1592       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1593       Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1594     }
1595     return Result;
1596   }
1597   case Intrinsic::eh_sjlj_setjmp:
1598     SDValue Val = Subtarget->isThumb() ?
1599       DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) :
1600       DAG.getConstant(0, MVT::i32);
1601     return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1),
1602                        Val);
1603   }
1604 }
1605
1606 static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
1607                           const ARMSubtarget *Subtarget) {
1608   DebugLoc dl = Op.getDebugLoc();
1609   SDValue Op5 = Op.getOperand(5);
1610   SDValue Res;
1611   unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
1612   if (isDeviceBarrier) {
1613     if (Subtarget->hasV7Ops())
1614       Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0));
1615     else
1616       Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0),
1617                         DAG.getConstant(0, MVT::i32));
1618   } else {
1619     if (Subtarget->hasV7Ops())
1620       Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
1621     else
1622       Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
1623                         DAG.getConstant(0, MVT::i32));
1624   }
1625   return Res;
1626 }
1627
1628 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
1629   MachineFunction &MF = DAG.getMachineFunction();
1630   ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
1631
1632   // vastart just stores the address of the VarArgsFrameIndex slot into the
1633   // memory location argument.
1634   DebugLoc dl = Op.getDebugLoc();
1635   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1636   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1637   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1638   return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
1639                       false, false, 0);
1640 }
1641
1642 SDValue
1643 ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
1644                                            SelectionDAG &DAG) const {
1645   SDNode *Node = Op.getNode();
1646   DebugLoc dl = Node->getDebugLoc();
1647   EVT VT = Node->getValueType(0);
1648   SDValue Chain = Op.getOperand(0);
1649   SDValue Size  = Op.getOperand(1);
1650   SDValue Align = Op.getOperand(2);
1651
1652   // Chain the dynamic stack allocation so that it doesn't modify the stack
1653   // pointer when other instructions are using the stack.
1654   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
1655
1656   unsigned AlignVal = cast<ConstantSDNode>(Align)->getZExtValue();
1657   unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment();
1658   if (AlignVal > StackAlign)
1659     // Do this now since selection pass cannot introduce new target
1660     // independent node.
1661     Align = DAG.getConstant(-(uint64_t)AlignVal, VT);
1662
1663   // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up
1664   // using a "add r, sp, r" instead. Negate the size now so we don't have to
1665   // do even more horrible hack later.
1666   MachineFunction &MF = DAG.getMachineFunction();
1667   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1668   if (AFI->isThumb1OnlyFunction()) {
1669     bool Negate = true;
1670     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Size);
1671     if (C) {
1672       uint32_t Val = C->getZExtValue();
1673       if (Val <= 508 && ((Val & 3) == 0))
1674         Negate = false;
1675     }
1676     if (Negate)
1677       Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size);
1678   }
1679
1680   SDVTList VTList = DAG.getVTList(VT, MVT::Other);
1681   SDValue Ops1[] = { Chain, Size, Align };
1682   SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3);
1683   Chain = Res.getValue(1);
1684   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
1685                              DAG.getIntPtrConstant(0, true), SDValue());
1686   SDValue Ops2[] = { Res, Chain };
1687   return DAG.getMergeValues(Ops2, 2, dl);
1688 }
1689
1690 SDValue
1691 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
1692                                         SDValue &Root, SelectionDAG &DAG,
1693                                         DebugLoc dl) const {
1694   MachineFunction &MF = DAG.getMachineFunction();
1695   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1696
1697   TargetRegisterClass *RC;
1698   if (AFI->isThumb1OnlyFunction())
1699     RC = ARM::tGPRRegisterClass;
1700   else
1701     RC = ARM::GPRRegisterClass;
1702
1703   // Transform the arguments stored in physical registers into virtual ones.
1704   unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
1705   SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
1706
1707   SDValue ArgValue2;
1708   if (NextVA.isMemLoc()) {
1709     MachineFrameInfo *MFI = MF.getFrameInfo();
1710     int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true, false);
1711
1712     // Create load node to retrieve arguments from the stack.
1713     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
1714     ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
1715                             PseudoSourceValue::getFixedStack(FI), 0,
1716                             false, false, 0);
1717   } else {
1718     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1719     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
1720   }
1721
1722   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
1723 }
1724
1725 SDValue
1726 ARMTargetLowering::LowerFormalArguments(SDValue Chain,
1727                                         CallingConv::ID CallConv, bool isVarArg,
1728                                         const SmallVectorImpl<ISD::InputArg>
1729                                           &Ins,
1730                                         DebugLoc dl, SelectionDAG &DAG,
1731                                         SmallVectorImpl<SDValue> &InVals)
1732                                           const {
1733
1734   MachineFunction &MF = DAG.getMachineFunction();
1735   MachineFrameInfo *MFI = MF.getFrameInfo();
1736
1737   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1738
1739   // Assign locations to all of the incoming arguments.
1740   SmallVector<CCValAssign, 16> ArgLocs;
1741   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1742                  *DAG.getContext());
1743   CCInfo.AnalyzeFormalArguments(Ins,
1744                                 CCAssignFnForNode(CallConv, /* Return*/ false,
1745                                                   isVarArg));
1746
1747   SmallVector<SDValue, 16> ArgValues;
1748
1749   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1750     CCValAssign &VA = ArgLocs[i];
1751
1752     // Arguments stored in registers.
1753     if (VA.isRegLoc()) {
1754       EVT RegVT = VA.getLocVT();
1755
1756       SDValue ArgValue;
1757       if (VA.needsCustom()) {
1758         // f64 and vector types are split up into multiple registers or
1759         // combinations of registers and stack slots.
1760         if (VA.getLocVT() == MVT::v2f64) {
1761           SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
1762                                                    Chain, DAG, dl);
1763           VA = ArgLocs[++i]; // skip ahead to next loc
1764           SDValue ArgValue2;
1765           if (VA.isMemLoc()) {
1766             int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(),
1767                                             true, false);
1768             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
1769             ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
1770                                     PseudoSourceValue::getFixedStack(FI), 0,
1771                                     false, false, 0);
1772           } else {
1773             ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
1774                                              Chain, DAG, dl);
1775           }
1776           ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1777           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
1778                                  ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
1779           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
1780                                  ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
1781         } else
1782           ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
1783
1784       } else {
1785         TargetRegisterClass *RC;
1786
1787         if (RegVT == MVT::f32)
1788           RC = ARM::SPRRegisterClass;
1789         else if (RegVT == MVT::f64)
1790           RC = ARM::DPRRegisterClass;
1791         else if (RegVT == MVT::v2f64)
1792           RC = ARM::QPRRegisterClass;
1793         else if (RegVT == MVT::i32)
1794           RC = (AFI->isThumb1OnlyFunction() ?
1795                 ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
1796         else
1797           llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
1798
1799         // Transform the arguments in physical registers into virtual ones.
1800         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
1801         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1802       }
1803
1804       // If this is an 8 or 16-bit value, it is really passed promoted
1805       // to 32 bits.  Insert an assert[sz]ext to capture this, then
1806       // truncate to the right size.
1807       switch (VA.getLocInfo()) {
1808       default: llvm_unreachable("Unknown loc info!");
1809       case CCValAssign::Full: break;
1810       case CCValAssign::BCvt:
1811         ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
1812         break;
1813       case CCValAssign::SExt:
1814         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1815                                DAG.getValueType(VA.getValVT()));
1816         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1817         break;
1818       case CCValAssign::ZExt:
1819         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1820                                DAG.getValueType(VA.getValVT()));
1821         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1822         break;
1823       }
1824
1825       InVals.push_back(ArgValue);
1826
1827     } else { // VA.isRegLoc()
1828
1829       // sanity check
1830       assert(VA.isMemLoc());
1831       assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
1832
1833       unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
1834       int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
1835                                       true, false);
1836
1837       // Create load nodes to retrieve arguments from the stack.
1838       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
1839       InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
1840                                    PseudoSourceValue::getFixedStack(FI), 0,
1841                                    false, false, 0));
1842     }
1843   }
1844
1845   // varargs
1846   if (isVarArg) {
1847     static const unsigned GPRArgRegs[] = {
1848       ARM::R0, ARM::R1, ARM::R2, ARM::R3
1849     };
1850
1851     unsigned NumGPRs = CCInfo.getFirstUnallocated
1852       (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
1853
1854     unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
1855     unsigned VARegSize = (4 - NumGPRs) * 4;
1856     unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
1857     unsigned ArgOffset = CCInfo.getNextStackOffset();
1858     if (VARegSaveSize) {
1859       // If this function is vararg, store any remaining integer argument regs
1860       // to their spots on the stack so that they may be loaded by deferencing
1861       // the result of va_next.
1862       AFI->setVarArgsRegSaveSize(VARegSaveSize);
1863       AFI->setVarArgsFrameIndex(
1864         MFI->CreateFixedObject(VARegSaveSize,
1865                                ArgOffset + VARegSaveSize - VARegSize,
1866                                true, false));
1867       SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
1868                                       getPointerTy());
1869
1870       SmallVector<SDValue, 4> MemOps;
1871       for (; NumGPRs < 4; ++NumGPRs) {
1872         TargetRegisterClass *RC;
1873         if (AFI->isThumb1OnlyFunction())
1874           RC = ARM::tGPRRegisterClass;
1875         else
1876           RC = ARM::GPRRegisterClass;
1877
1878         unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
1879         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
1880         SDValue Store =
1881           DAG.getStore(Val.getValue(1), dl, Val, FIN,
1882                        PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), 0,
1883                        false, false, 0);
1884         MemOps.push_back(Store);
1885         FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
1886                           DAG.getConstant(4, getPointerTy()));
1887       }
1888       if (!MemOps.empty())
1889         Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1890                             &MemOps[0], MemOps.size());
1891     } else
1892       // This will point to the next argument passed via stack.
1893       AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset,
1894                                                        true, false));
1895   }
1896
1897   return Chain;
1898 }
1899
1900 /// isFloatingPointZero - Return true if this is +0.0.
1901 static bool isFloatingPointZero(SDValue Op) {
1902   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1903     return CFP->getValueAPF().isPosZero();
1904   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1905     // Maybe this has already been legalized into the constant pool?
1906     if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
1907       SDValue WrapperOp = Op.getOperand(1).getOperand(0);
1908       if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
1909         if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1910           return CFP->getValueAPF().isPosZero();
1911     }
1912   }
1913   return false;
1914 }
1915
1916 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
1917 /// the given operands.
1918 SDValue
1919 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1920                              SDValue &ARMCC, SelectionDAG &DAG,
1921                              DebugLoc dl) const {
1922   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
1923     unsigned C = RHSC->getZExtValue();
1924     if (!isLegalICmpImmediate(C)) {
1925       // Constant does not fit, try adjusting it by one?
1926       switch (CC) {
1927       default: break;
1928       case ISD::SETLT:
1929       case ISD::SETGE:
1930         if (isLegalICmpImmediate(C-1)) {
1931           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
1932           RHS = DAG.getConstant(C-1, MVT::i32);
1933         }
1934         break;
1935       case ISD::SETULT:
1936       case ISD::SETUGE:
1937         if (C > 0 && isLegalICmpImmediate(C-1)) {
1938           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
1939           RHS = DAG.getConstant(C-1, MVT::i32);
1940         }
1941         break;
1942       case ISD::SETLE:
1943       case ISD::SETGT:
1944         if (isLegalICmpImmediate(C+1)) {
1945           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
1946           RHS = DAG.getConstant(C+1, MVT::i32);
1947         }
1948         break;
1949       case ISD::SETULE:
1950       case ISD::SETUGT:
1951         if (C < 0xffffffff && isLegalICmpImmediate(C+1)) {
1952           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
1953           RHS = DAG.getConstant(C+1, MVT::i32);
1954         }
1955         break;
1956       }
1957     }
1958   }
1959
1960   ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
1961   ARMISD::NodeType CompareType;
1962   switch (CondCode) {
1963   default:
1964     CompareType = ARMISD::CMP;
1965     break;
1966   case ARMCC::EQ:
1967   case ARMCC::NE:
1968     // Uses only Z Flag
1969     CompareType = ARMISD::CMPZ;
1970     break;
1971   }
1972   ARMCC = DAG.getConstant(CondCode, MVT::i32);
1973   return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
1974 }
1975
1976 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
1977 static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
1978                          DebugLoc dl) {
1979   SDValue Cmp;
1980   if (!isFloatingPointZero(RHS))
1981     Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
1982   else
1983     Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS);
1984   return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
1985 }
1986
1987 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
1988   EVT VT = Op.getValueType();
1989   SDValue LHS = Op.getOperand(0);
1990   SDValue RHS = Op.getOperand(1);
1991   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
1992   SDValue TrueVal = Op.getOperand(2);
1993   SDValue FalseVal = Op.getOperand(3);
1994   DebugLoc dl = Op.getDebugLoc();
1995
1996   if (LHS.getValueType() == MVT::i32) {
1997     SDValue ARMCC;
1998     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
1999     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
2000     return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
2001   }
2002
2003   ARMCC::CondCodes CondCode, CondCode2;
2004   FPCCToARMCC(CC, CondCode, CondCode2);
2005
2006   SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
2007   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2008   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
2009   SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
2010                                  ARMCC, CCR, Cmp);
2011   if (CondCode2 != ARMCC::AL) {
2012     SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
2013     // FIXME: Needs another CMP because flag can have but one use.
2014     SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
2015     Result = DAG.getNode(ARMISD::CMOV, dl, VT,
2016                          Result, TrueVal, ARMCC2, CCR, Cmp2);
2017   }
2018   return Result;
2019 }
2020
2021 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
2022   SDValue  Chain = Op.getOperand(0);
2023   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2024   SDValue    LHS = Op.getOperand(2);
2025   SDValue    RHS = Op.getOperand(3);
2026   SDValue   Dest = Op.getOperand(4);
2027   DebugLoc dl = Op.getDebugLoc();
2028
2029   if (LHS.getValueType() == MVT::i32) {
2030     SDValue ARMCC;
2031     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2032     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
2033     return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
2034                        Chain, Dest, ARMCC, CCR,Cmp);
2035   }
2036
2037   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
2038   ARMCC::CondCodes CondCode, CondCode2;
2039   FPCCToARMCC(CC, CondCode, CondCode2);
2040
2041   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
2042   SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
2043   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2044   SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
2045   SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
2046   SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
2047   if (CondCode2 != ARMCC::AL) {
2048     ARMCC = DAG.getConstant(CondCode2, MVT::i32);
2049     SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) };
2050     Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
2051   }
2052   return Res;
2053 }
2054
2055 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
2056   SDValue Chain = Op.getOperand(0);
2057   SDValue Table = Op.getOperand(1);
2058   SDValue Index = Op.getOperand(2);
2059   DebugLoc dl = Op.getDebugLoc();
2060
2061   EVT PTy = getPointerTy();
2062   JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
2063   ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2064   SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
2065   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
2066   Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
2067   Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
2068   SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
2069   if (Subtarget->isThumb2()) {
2070     // Thumb2 uses a two-level jump. That is, it jumps into the jump table
2071     // which does another jump to the destination. This also makes it easier
2072     // to translate it to TBB / TBH later.
2073     // FIXME: This might not work if the function is extremely large.
2074     return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
2075                        Addr, Op.getOperand(2), JTI, UId);
2076   }
2077   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
2078     Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
2079                        PseudoSourceValue::getJumpTable(), 0,
2080                        false, false, 0);
2081     Chain = Addr.getValue(1);
2082     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
2083     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
2084   } else {
2085     Addr = DAG.getLoad(PTy, dl, Chain, Addr,
2086                        PseudoSourceValue::getJumpTable(), 0, false, false, 0);
2087     Chain = Addr.getValue(1);
2088     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
2089   }
2090 }
2091
2092 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
2093   DebugLoc dl = Op.getDebugLoc();
2094   unsigned Opc;
2095
2096   switch (Op.getOpcode()) {
2097   default:
2098     assert(0 && "Invalid opcode!");
2099   case ISD::FP_TO_SINT:
2100     Opc = ARMISD::FTOSI;
2101     break;
2102   case ISD::FP_TO_UINT:
2103     Opc = ARMISD::FTOUI;
2104     break;
2105   }
2106   Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
2107   return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
2108 }
2109
2110 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2111   EVT VT = Op.getValueType();
2112   DebugLoc dl = Op.getDebugLoc();
2113   unsigned Opc;
2114
2115   switch (Op.getOpcode()) {
2116   default:
2117     assert(0 && "Invalid opcode!");
2118   case ISD::SINT_TO_FP:
2119     Opc = ARMISD::SITOF;
2120     break;
2121   case ISD::UINT_TO_FP:
2122     Opc = ARMISD::UITOF;
2123     break;
2124   }
2125
2126   Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
2127   return DAG.getNode(Opc, dl, VT, Op);
2128 }
2129
2130 static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
2131   // Implement fcopysign with a fabs and a conditional fneg.
2132   SDValue Tmp0 = Op.getOperand(0);
2133   SDValue Tmp1 = Op.getOperand(1);
2134   DebugLoc dl = Op.getDebugLoc();
2135   EVT VT = Op.getValueType();
2136   EVT SrcVT = Tmp1.getValueType();
2137   SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
2138   SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl);
2139   SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
2140   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2141   return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
2142 }
2143
2144 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
2145   MachineFunction &MF = DAG.getMachineFunction();
2146   MachineFrameInfo *MFI = MF.getFrameInfo();
2147   MFI->setReturnAddressIsTaken(true);
2148
2149   EVT VT = Op.getValueType();
2150   DebugLoc dl = Op.getDebugLoc();
2151   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2152   if (Depth) {
2153     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
2154     SDValue Offset = DAG.getConstant(4, MVT::i32);
2155     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
2156                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
2157                        NULL, 0, false, false, 0);
2158   }
2159
2160   // Return LR, which contains the return address. Mark it an implicit live-in.
2161   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2162   TargetRegisterClass *RC = AFI->isThumb1OnlyFunction()
2163     ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
2164   unsigned Reg = MF.addLiveIn(ARM::LR, RC);
2165   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
2166 }
2167
2168 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
2169   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
2170   MFI->setFrameAddressIsTaken(true);
2171
2172   EVT VT = Op.getValueType();
2173   DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
2174   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2175   unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
2176     ? ARM::R7 : ARM::R11;
2177   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
2178   while (Depth--)
2179     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
2180                             false, false, 0);
2181   return FrameAddr;
2182 }
2183
2184 /// ExpandBIT_CONVERT - If the target supports VFP, this function is called to
2185 /// expand a bit convert where either the source or destination type is i64 to
2186 /// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
2187 /// operand type is illegal (e.g., v2f32 for a target that doesn't support
2188 /// vectors), since the legalizer won't know what to do with that.
2189 static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
2190   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2191   DebugLoc dl = N->getDebugLoc();
2192   SDValue Op = N->getOperand(0);
2193
2194   // This function is only supposed to be called for i64 types, either as the
2195   // source or destination of the bit convert.
2196   EVT SrcVT = Op.getValueType();
2197   EVT DstVT = N->getValueType(0);
2198   assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
2199          "ExpandBIT_CONVERT called for non-i64 type");
2200
2201   // Turn i64->f64 into VMOVDRR.
2202   if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
2203     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
2204                              DAG.getConstant(0, MVT::i32));
2205     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
2206                              DAG.getConstant(1, MVT::i32));
2207     return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2208   }
2209
2210   // Turn f64->i64 into VMOVRRD.
2211   if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
2212     SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
2213                               DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
2214     // Merge the pieces into a single i64 value.
2215     return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
2216   }
2217
2218   return SDValue();
2219 }
2220
2221 /// getZeroVector - Returns a vector of specified type with all zero elements.
2222 ///
2223 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
2224   assert(VT.isVector() && "Expected a vector type");
2225
2226   // Zero vectors are used to represent vector negation and in those cases
2227   // will be implemented with the NEON VNEG instruction.  However, VNEG does
2228   // not support i64 elements, so sometimes the zero vectors will need to be
2229   // explicitly constructed.  For those cases, and potentially other uses in
2230   // the future, always build zero vectors as <16 x i8> or <8 x i8> bitcasted
2231   // to their dest type.  This ensures they get CSE'd.
2232   SDValue Vec;
2233   SDValue Cst = DAG.getTargetConstant(0, MVT::i8);
2234   SmallVector<SDValue, 8> Ops;
2235   MVT TVT;
2236
2237   if (VT.getSizeInBits() == 64) {
2238     Ops.assign(8, Cst); TVT = MVT::v8i8;
2239   } else {
2240     Ops.assign(16, Cst); TVT = MVT::v16i8;
2241   }
2242   Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
2243
2244   return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
2245 }
2246
2247 /// getOnesVector - Returns a vector of specified type with all bits set.
2248 ///
2249 static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
2250   assert(VT.isVector() && "Expected a vector type");
2251
2252   // Always build ones vectors as <16 x i8> or <8 x i8> bitcasted to their
2253   // dest type. This ensures they get CSE'd.
2254   SDValue Vec;
2255   SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8);
2256   SmallVector<SDValue, 8> Ops;
2257   MVT TVT;
2258
2259   if (VT.getSizeInBits() == 64) {
2260     Ops.assign(8, Cst); TVT = MVT::v8i8;
2261   } else {
2262     Ops.assign(16, Cst); TVT = MVT::v16i8;
2263   }
2264   Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
2265
2266   return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
2267 }
2268
2269 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
2270 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
2271 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
2272                                                 SelectionDAG &DAG) const {
2273   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
2274   EVT VT = Op.getValueType();
2275   unsigned VTBits = VT.getSizeInBits();
2276   DebugLoc dl = Op.getDebugLoc();
2277   SDValue ShOpLo = Op.getOperand(0);
2278   SDValue ShOpHi = Op.getOperand(1);
2279   SDValue ShAmt  = Op.getOperand(2);
2280   SDValue ARMCC;
2281   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
2282
2283   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
2284
2285   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
2286                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
2287   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
2288   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
2289                                    DAG.getConstant(VTBits, MVT::i32));
2290   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
2291   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
2292   SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
2293
2294   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2295   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
2296                           ARMCC, DAG, dl);
2297   SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
2298   SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC,
2299                            CCR, Cmp);
2300
2301   SDValue Ops[2] = { Lo, Hi };
2302   return DAG.getMergeValues(Ops, 2, dl);
2303 }
2304
2305 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
2306 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
2307 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
2308                                                SelectionDAG &DAG) const {
2309   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
2310   EVT VT = Op.getValueType();
2311   unsigned VTBits = VT.getSizeInBits();
2312   DebugLoc dl = Op.getDebugLoc();
2313   SDValue ShOpLo = Op.getOperand(0);
2314   SDValue ShOpHi = Op.getOperand(1);
2315   SDValue ShAmt  = Op.getOperand(2);
2316   SDValue ARMCC;
2317
2318   assert(Op.getOpcode() == ISD::SHL_PARTS);
2319   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
2320                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
2321   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
2322   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
2323                                    DAG.getConstant(VTBits, MVT::i32));
2324   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
2325   SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
2326
2327   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
2328   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2329   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
2330                           ARMCC, DAG, dl);
2331   SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
2332   SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC,
2333                            CCR, Cmp);
2334
2335   SDValue Ops[2] = { Lo, Hi };
2336   return DAG.getMergeValues(Ops, 2, dl);
2337 }
2338
2339 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
2340                          const ARMSubtarget *ST) {
2341   EVT VT = N->getValueType(0);
2342   DebugLoc dl = N->getDebugLoc();
2343
2344   if (!ST->hasV6T2Ops())
2345     return SDValue();
2346
2347   SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
2348   return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
2349 }
2350
2351 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
2352                           const ARMSubtarget *ST) {
2353   EVT VT = N->getValueType(0);
2354   DebugLoc dl = N->getDebugLoc();
2355
2356   // Lower vector shifts on NEON to use VSHL.
2357   if (VT.isVector()) {
2358     assert(ST->hasNEON() && "unexpected vector shift");
2359
2360     // Left shifts translate directly to the vshiftu intrinsic.
2361     if (N->getOpcode() == ISD::SHL)
2362       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
2363                          DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
2364                          N->getOperand(0), N->getOperand(1));
2365
2366     assert((N->getOpcode() == ISD::SRA ||
2367             N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
2368
2369     // NEON uses the same intrinsics for both left and right shifts.  For
2370     // right shifts, the shift amounts are negative, so negate the vector of
2371     // shift amounts.
2372     EVT ShiftVT = N->getOperand(1).getValueType();
2373     SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
2374                                        getZeroVector(ShiftVT, DAG, dl),
2375                                        N->getOperand(1));
2376     Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
2377                                Intrinsic::arm_neon_vshifts :
2378                                Intrinsic::arm_neon_vshiftu);
2379     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
2380                        DAG.getConstant(vshiftInt, MVT::i32),
2381                        N->getOperand(0), NegatedCount);
2382   }
2383
2384   // We can get here for a node like i32 = ISD::SHL i32, i64
2385   if (VT != MVT::i64)
2386     return SDValue();
2387
2388   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
2389          "Unknown shift to lower!");
2390
2391   // We only lower SRA, SRL of 1 here, all others use generic lowering.
2392   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
2393       cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
2394     return SDValue();
2395
2396   // If we are in thumb mode, we don't have RRX.
2397   if (ST->isThumb1Only()) return SDValue();
2398
2399   // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
2400   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
2401                              DAG.getConstant(0, MVT::i32));
2402   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
2403                              DAG.getConstant(1, MVT::i32));
2404
2405   // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
2406   // captures the result into a carry flag.
2407   unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
2408   Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1);
2409
2410   // The low part is an ARMISD::RRX operand, which shifts the carry in.
2411   Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
2412
2413   // Merge the pieces into a single i64 value.
2414  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
2415 }
2416
2417 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
2418   SDValue TmpOp0, TmpOp1;
2419   bool Invert = false;
2420   bool Swap = false;
2421   unsigned Opc = 0;
2422
2423   SDValue Op0 = Op.getOperand(0);
2424   SDValue Op1 = Op.getOperand(1);
2425   SDValue CC = Op.getOperand(2);
2426   EVT VT = Op.getValueType();
2427   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
2428   DebugLoc dl = Op.getDebugLoc();
2429
2430   if (Op.getOperand(1).getValueType().isFloatingPoint()) {
2431     switch (SetCCOpcode) {
2432     default: llvm_unreachable("Illegal FP comparison"); break;
2433     case ISD::SETUNE:
2434     case ISD::SETNE:  Invert = true; // Fallthrough
2435     case ISD::SETOEQ:
2436     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
2437     case ISD::SETOLT:
2438     case ISD::SETLT: Swap = true; // Fallthrough
2439     case ISD::SETOGT:
2440     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
2441     case ISD::SETOLE:
2442     case ISD::SETLE:  Swap = true; // Fallthrough
2443     case ISD::SETOGE:
2444     case ISD::SETGE: Opc = ARMISD::VCGE; break;
2445     case ISD::SETUGE: Swap = true; // Fallthrough
2446     case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
2447     case ISD::SETUGT: Swap = true; // Fallthrough
2448     case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
2449     case ISD::SETUEQ: Invert = true; // Fallthrough
2450     case ISD::SETONE:
2451       // Expand this to (OLT | OGT).
2452       TmpOp0 = Op0;
2453       TmpOp1 = Op1;
2454       Opc = ISD::OR;
2455       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
2456       Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
2457       break;
2458     case ISD::SETUO: Invert = true; // Fallthrough
2459     case ISD::SETO:
2460       // Expand this to (OLT | OGE).
2461       TmpOp0 = Op0;
2462       TmpOp1 = Op1;
2463       Opc = ISD::OR;
2464       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
2465       Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
2466       break;
2467     }
2468   } else {
2469     // Integer comparisons.
2470     switch (SetCCOpcode) {
2471     default: llvm_unreachable("Illegal integer comparison"); break;
2472     case ISD::SETNE:  Invert = true;
2473     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
2474     case ISD::SETLT:  Swap = true;
2475     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
2476     case ISD::SETLE:  Swap = true;
2477     case ISD::SETGE:  Opc = ARMISD::VCGE; break;
2478     case ISD::SETULT: Swap = true;
2479     case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
2480     case ISD::SETULE: Swap = true;
2481     case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
2482     }
2483
2484     // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
2485     if (Opc == ARMISD::VCEQ) {
2486
2487       SDValue AndOp;
2488       if (ISD::isBuildVectorAllZeros(Op1.getNode()))
2489         AndOp = Op0;
2490       else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
2491         AndOp = Op1;
2492
2493       // Ignore bitconvert.
2494       if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT)
2495         AndOp = AndOp.getOperand(0);
2496
2497       if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
2498         Opc = ARMISD::VTST;
2499         Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0));
2500         Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1));
2501         Invert = !Invert;
2502       }
2503     }
2504   }
2505
2506   if (Swap)
2507     std::swap(Op0, Op1);
2508
2509   SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
2510
2511   if (Invert)
2512     Result = DAG.getNOT(dl, Result, VT);
2513
2514   return Result;
2515 }
2516
2517 /// isVMOVSplat - Check if the specified splat value corresponds to an immediate
2518 /// VMOV instruction, and if so, return the constant being splatted.
2519 static SDValue isVMOVSplat(uint64_t SplatBits, uint64_t SplatUndef,
2520                            unsigned SplatBitSize, SelectionDAG &DAG) {
2521   switch (SplatBitSize) {
2522   case 8:
2523     // Any 1-byte value is OK.
2524     assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
2525     return DAG.getTargetConstant(SplatBits, MVT::i8);
2526
2527   case 16:
2528     // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
2529     if ((SplatBits & ~0xff) == 0 ||
2530         (SplatBits & ~0xff00) == 0)
2531       return DAG.getTargetConstant(SplatBits, MVT::i16);
2532     break;
2533
2534   case 32:
2535     // NEON's 32-bit VMOV supports splat values where:
2536     // * only one byte is nonzero, or
2537     // * the least significant byte is 0xff and the second byte is nonzero, or
2538     // * the least significant 2 bytes are 0xff and the third is nonzero.
2539     if ((SplatBits & ~0xff) == 0 ||
2540         (SplatBits & ~0xff00) == 0 ||
2541         (SplatBits & ~0xff0000) == 0 ||
2542         (SplatBits & ~0xff000000) == 0)
2543       return DAG.getTargetConstant(SplatBits, MVT::i32);
2544
2545     if ((SplatBits & ~0xffff) == 0 &&
2546         ((SplatBits | SplatUndef) & 0xff) == 0xff)
2547       return DAG.getTargetConstant(SplatBits | 0xff, MVT::i32);
2548
2549     if ((SplatBits & ~0xffffff) == 0 &&
2550         ((SplatBits | SplatUndef) & 0xffff) == 0xffff)
2551       return DAG.getTargetConstant(SplatBits | 0xffff, MVT::i32);
2552
2553     // Note: there are a few 32-bit splat values (specifically: 00ffff00,
2554     // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
2555     // VMOV.I32.  A (very) minor optimization would be to replicate the value
2556     // and fall through here to test for a valid 64-bit splat.  But, then the
2557     // caller would also need to check and handle the change in size.
2558     break;
2559
2560   case 64: {
2561     // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
2562     uint64_t BitMask = 0xff;
2563     uint64_t Val = 0;
2564     for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
2565       if (((SplatBits | SplatUndef) & BitMask) == BitMask)
2566         Val |= BitMask;
2567       else if ((SplatBits & BitMask) != 0)
2568         return SDValue();
2569       BitMask <<= 8;
2570     }
2571     return DAG.getTargetConstant(Val, MVT::i64);
2572   }
2573
2574   default:
2575     llvm_unreachable("unexpected size for isVMOVSplat");
2576     break;
2577   }
2578
2579   return SDValue();
2580 }
2581
2582 /// getVMOVImm - If this is a build_vector of constants which can be
2583 /// formed by using a VMOV instruction of the specified element size,
2584 /// return the constant being splatted.  The ByteSize field indicates the
2585 /// number of bytes of each element [1248].
2586 SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2587   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
2588   APInt SplatBits, SplatUndef;
2589   unsigned SplatBitSize;
2590   bool HasAnyUndefs;
2591   if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
2592                                       HasAnyUndefs, ByteSize * 8))
2593     return SDValue();
2594
2595   if (SplatBitSize > ByteSize * 8)
2596     return SDValue();
2597
2598   return isVMOVSplat(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
2599                      SplatBitSize, DAG);
2600 }
2601
2602 static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
2603                        bool &ReverseVEXT, unsigned &Imm) {
2604   unsigned NumElts = VT.getVectorNumElements();
2605   ReverseVEXT = false;
2606   Imm = M[0];
2607
2608   // If this is a VEXT shuffle, the immediate value is the index of the first
2609   // element.  The other shuffle indices must be the successive elements after
2610   // the first one.
2611   unsigned ExpectedElt = Imm;
2612   for (unsigned i = 1; i < NumElts; ++i) {
2613     // Increment the expected index.  If it wraps around, it may still be
2614     // a VEXT but the source vectors must be swapped.
2615     ExpectedElt += 1;
2616     if (ExpectedElt == NumElts * 2) {
2617       ExpectedElt = 0;
2618       ReverseVEXT = true;
2619     }
2620
2621     if (ExpectedElt != static_cast<unsigned>(M[i]))
2622       return false;
2623   }
2624
2625   // Adjust the index value if the source operands will be swapped.
2626   if (ReverseVEXT)
2627     Imm -= NumElts;
2628
2629   return true;
2630 }
2631
2632 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
2633 /// instruction with the specified blocksize.  (The order of the elements
2634 /// within each block of the vector is reversed.)
2635 static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
2636                        unsigned BlockSize) {
2637   assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
2638          "Only possible block sizes for VREV are: 16, 32, 64");
2639
2640   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2641   if (EltSz == 64)
2642     return false;
2643
2644   unsigned NumElts = VT.getVectorNumElements();
2645   unsigned BlockElts = M[0] + 1;
2646
2647   if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
2648     return false;
2649
2650   for (unsigned i = 0; i < NumElts; ++i) {
2651     if ((unsigned) M[i] !=
2652         (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
2653       return false;
2654   }
2655
2656   return true;
2657 }
2658
2659 static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
2660                        unsigned &WhichResult) {
2661   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2662   if (EltSz == 64)
2663     return false;
2664
2665   unsigned NumElts = VT.getVectorNumElements();
2666   WhichResult = (M[0] == 0 ? 0 : 1);
2667   for (unsigned i = 0; i < NumElts; i += 2) {
2668     if ((unsigned) M[i] != i + WhichResult ||
2669         (unsigned) M[i+1] != i + NumElts + WhichResult)
2670       return false;
2671   }
2672   return true;
2673 }
2674
2675 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
2676 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
2677 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
2678 static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
2679                                 unsigned &WhichResult) {
2680   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2681   if (EltSz == 64)
2682     return false;
2683
2684   unsigned NumElts = VT.getVectorNumElements();
2685   WhichResult = (M[0] == 0 ? 0 : 1);
2686   for (unsigned i = 0; i < NumElts; i += 2) {
2687     if ((unsigned) M[i] != i + WhichResult ||
2688         (unsigned) M[i+1] != i + WhichResult)
2689       return false;
2690   }
2691   return true;
2692 }
2693
2694 static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
2695                        unsigned &WhichResult) {
2696   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2697   if (EltSz == 64)
2698     return false;
2699
2700   unsigned NumElts = VT.getVectorNumElements();
2701   WhichResult = (M[0] == 0 ? 0 : 1);
2702   for (unsigned i = 0; i != NumElts; ++i) {
2703     if ((unsigned) M[i] != 2 * i + WhichResult)
2704       return false;
2705   }
2706
2707   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
2708   if (VT.is64BitVector() && EltSz == 32)
2709     return false;
2710
2711   return true;
2712 }
2713
2714 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
2715 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
2716 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
2717 static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
2718                                 unsigned &WhichResult) {
2719   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2720   if (EltSz == 64)
2721     return false;
2722
2723   unsigned Half = VT.getVectorNumElements() / 2;
2724   WhichResult = (M[0] == 0 ? 0 : 1);
2725   for (unsigned j = 0; j != 2; ++j) {
2726     unsigned Idx = WhichResult;
2727     for (unsigned i = 0; i != Half; ++i) {
2728       if ((unsigned) M[i + j * Half] != Idx)
2729         return false;
2730       Idx += 2;
2731     }
2732   }
2733
2734   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
2735   if (VT.is64BitVector() && EltSz == 32)
2736     return false;
2737
2738   return true;
2739 }
2740
2741 static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
2742                        unsigned &WhichResult) {
2743   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2744   if (EltSz == 64)
2745     return false;
2746
2747   unsigned NumElts = VT.getVectorNumElements();
2748   WhichResult = (M[0] == 0 ? 0 : 1);
2749   unsigned Idx = WhichResult * NumElts / 2;
2750   for (unsigned i = 0; i != NumElts; i += 2) {
2751     if ((unsigned) M[i] != Idx ||
2752         (unsigned) M[i+1] != Idx + NumElts)
2753       return false;
2754     Idx += 1;
2755   }
2756
2757   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
2758   if (VT.is64BitVector() && EltSz == 32)
2759     return false;
2760
2761   return true;
2762 }
2763
2764 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
2765 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
2766 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
2767 static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
2768                                 unsigned &WhichResult) {
2769   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2770   if (EltSz == 64)
2771     return false;
2772
2773   unsigned NumElts = VT.getVectorNumElements();
2774   WhichResult = (M[0] == 0 ? 0 : 1);
2775   unsigned Idx = WhichResult * NumElts / 2;
2776   for (unsigned i = 0; i != NumElts; i += 2) {
2777     if ((unsigned) M[i] != Idx ||
2778         (unsigned) M[i+1] != Idx)
2779       return false;
2780     Idx += 1;
2781   }
2782
2783   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
2784   if (VT.is64BitVector() && EltSz == 32)
2785     return false;
2786
2787   return true;
2788 }
2789
2790
2791 static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) {
2792   // Canonicalize all-zeros and all-ones vectors.
2793   ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode());
2794   if (ConstVal->isNullValue())
2795     return getZeroVector(VT, DAG, dl);
2796   if (ConstVal->isAllOnesValue())
2797     return getOnesVector(VT, DAG, dl);
2798
2799   EVT CanonicalVT;
2800   if (VT.is64BitVector()) {
2801     switch (Val.getValueType().getSizeInBits()) {
2802     case 8:  CanonicalVT = MVT::v8i8; break;
2803     case 16: CanonicalVT = MVT::v4i16; break;
2804     case 32: CanonicalVT = MVT::v2i32; break;
2805     case 64: CanonicalVT = MVT::v1i64; break;
2806     default: llvm_unreachable("unexpected splat element type"); break;
2807     }
2808   } else {
2809     assert(VT.is128BitVector() && "unknown splat vector size");
2810     switch (Val.getValueType().getSizeInBits()) {
2811     case 8:  CanonicalVT = MVT::v16i8; break;
2812     case 16: CanonicalVT = MVT::v8i16; break;
2813     case 32: CanonicalVT = MVT::v4i32; break;
2814     case 64: CanonicalVT = MVT::v2i64; break;
2815     default: llvm_unreachable("unexpected splat element type"); break;
2816     }
2817   }
2818
2819   // Build a canonical splat for this value.
2820   SmallVector<SDValue, 8> Ops;
2821   Ops.assign(CanonicalVT.getVectorNumElements(), Val);
2822   SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, &Ops[0],
2823                             Ops.size());
2824   return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Res);
2825 }
2826
2827 // If this is a case we can't handle, return null and let the default
2828 // expansion code take care of it.
2829 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
2830   BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
2831   DebugLoc dl = Op.getDebugLoc();
2832   EVT VT = Op.getValueType();
2833
2834   APInt SplatBits, SplatUndef;
2835   unsigned SplatBitSize;
2836   bool HasAnyUndefs;
2837   if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
2838     if (SplatBitSize <= 64) {
2839       SDValue Val = isVMOVSplat(SplatBits.getZExtValue(),
2840                                 SplatUndef.getZExtValue(), SplatBitSize, DAG);
2841       if (Val.getNode())
2842         return BuildSplat(Val, VT, DAG, dl);
2843     }
2844   }
2845
2846   // Scan through the operands to see if only one value is used.
2847   unsigned NumElts = VT.getVectorNumElements();
2848   bool isOnlyLowElement = true;
2849   bool usesOnlyOneValue = true;
2850   bool isConstant = true;
2851   SDValue Value;
2852   for (unsigned i = 0; i < NumElts; ++i) {
2853     SDValue V = Op.getOperand(i);
2854     if (V.getOpcode() == ISD::UNDEF)
2855       continue;
2856     if (i > 0)
2857       isOnlyLowElement = false;
2858     if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
2859       isConstant = false;
2860
2861     if (!Value.getNode())
2862       Value = V;
2863     else if (V != Value)
2864       usesOnlyOneValue = false;
2865   }
2866
2867   if (!Value.getNode())
2868     return DAG.getUNDEF(VT);
2869
2870   if (isOnlyLowElement)
2871     return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
2872
2873   // If all elements are constants, fall back to the default expansion, which
2874   // will generate a load from the constant pool.
2875   if (isConstant)
2876     return SDValue();
2877
2878   // Use VDUP for non-constant splats.
2879   if (usesOnlyOneValue)
2880     return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
2881
2882   // Vectors with 32- or 64-bit elements can be built by directly assigning
2883   // the subregisters.
2884   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
2885   if (EltSize >= 32) {
2886     // Do the expansion with floating-point types, since that is what the VFP
2887     // registers are defined to use, and since i64 is not legal.
2888     EVT EltVT = EVT::getFloatingPointVT(EltSize);
2889     EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
2890     SDValue Val = DAG.getUNDEF(VecVT);
2891     for (unsigned i = 0; i < NumElts; ++i) {
2892       SDValue Elt = Op.getOperand(i);
2893       if (Elt.getOpcode() == ISD::UNDEF)
2894         continue;
2895       Elt = DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Elt);
2896       Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, Elt,
2897                         DAG.getConstant(i, MVT::i32));
2898     }
2899     return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
2900   }
2901
2902   return SDValue();
2903 }
2904
2905 /// isShuffleMaskLegal - Targets can use this to indicate that they only
2906 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
2907 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
2908 /// are assumed to be legal.
2909 bool
2910 ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
2911                                       EVT VT) const {
2912   if (VT.getVectorNumElements() == 4 &&
2913       (VT.is128BitVector() || VT.is64BitVector())) {
2914     unsigned PFIndexes[4];
2915     for (unsigned i = 0; i != 4; ++i) {
2916       if (M[i] < 0)
2917         PFIndexes[i] = 8;
2918       else
2919         PFIndexes[i] = M[i];
2920     }
2921
2922     // Compute the index in the perfect shuffle table.
2923     unsigned PFTableIndex =
2924       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
2925     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
2926     unsigned Cost = (PFEntry >> 30);
2927
2928     if (Cost <= 4)
2929       return true;
2930   }
2931
2932   bool ReverseVEXT;
2933   unsigned Imm, WhichResult;
2934
2935   return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
2936           isVREVMask(M, VT, 64) ||
2937           isVREVMask(M, VT, 32) ||
2938           isVREVMask(M, VT, 16) ||
2939           isVEXTMask(M, VT, ReverseVEXT, Imm) ||
2940           isVTRNMask(M, VT, WhichResult) ||
2941           isVUZPMask(M, VT, WhichResult) ||
2942           isVZIPMask(M, VT, WhichResult) ||
2943           isVTRN_v_undef_Mask(M, VT, WhichResult) ||
2944           isVUZP_v_undef_Mask(M, VT, WhichResult) ||
2945           isVZIP_v_undef_Mask(M, VT, WhichResult));
2946 }
2947
2948 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
2949 /// the specified operations to build the shuffle.
2950 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
2951                                       SDValue RHS, SelectionDAG &DAG,
2952                                       DebugLoc dl) {
2953   unsigned OpNum = (PFEntry >> 26) & 0x0F;
2954   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
2955   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
2956
2957   enum {
2958     OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
2959     OP_VREV,
2960     OP_VDUP0,
2961     OP_VDUP1,
2962     OP_VDUP2,
2963     OP_VDUP3,
2964     OP_VEXT1,
2965     OP_VEXT2,
2966     OP_VEXT3,
2967     OP_VUZPL, // VUZP, left result
2968     OP_VUZPR, // VUZP, right result
2969     OP_VZIPL, // VZIP, left result
2970     OP_VZIPR, // VZIP, right result
2971     OP_VTRNL, // VTRN, left result
2972     OP_VTRNR  // VTRN, right result
2973   };
2974
2975   if (OpNum == OP_COPY) {
2976     if (LHSID == (1*9+2)*9+3) return LHS;
2977     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
2978     return RHS;
2979   }
2980
2981   SDValue OpLHS, OpRHS;
2982   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
2983   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
2984   EVT VT = OpLHS.getValueType();
2985
2986   switch (OpNum) {
2987   default: llvm_unreachable("Unknown shuffle opcode!");
2988   case OP_VREV:
2989     return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
2990   case OP_VDUP0:
2991   case OP_VDUP1:
2992   case OP_VDUP2:
2993   case OP_VDUP3:
2994     return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
2995                        OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
2996   case OP_VEXT1:
2997   case OP_VEXT2:
2998   case OP_VEXT3:
2999     return DAG.getNode(ARMISD::VEXT, dl, VT,
3000                        OpLHS, OpRHS,
3001                        DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
3002   case OP_VUZPL:
3003   case OP_VUZPR:
3004     return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3005                        OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
3006   case OP_VZIPL:
3007   case OP_VZIPR:
3008     return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3009                        OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
3010   case OP_VTRNL:
3011   case OP_VTRNR:
3012     return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3013                        OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
3014   }
3015 }
3016
3017 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
3018   SDValue V1 = Op.getOperand(0);
3019   SDValue V2 = Op.getOperand(1);
3020   DebugLoc dl = Op.getDebugLoc();
3021   EVT VT = Op.getValueType();
3022   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
3023   SmallVector<int, 8> ShuffleMask;
3024
3025   // Convert shuffles that are directly supported on NEON to target-specific
3026   // DAG nodes, instead of keeping them as shuffles and matching them again
3027   // during code selection.  This is more efficient and avoids the possibility
3028   // of inconsistencies between legalization and selection.
3029   // FIXME: floating-point vectors should be canonicalized to integer vectors
3030   // of the same time so that they get CSEd properly.
3031   SVN->getMask(ShuffleMask);
3032
3033   if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
3034     int Lane = SVN->getSplatIndex();
3035     // If this is undef splat, generate it via "just" vdup, if possible.
3036     if (Lane == -1) Lane = 0;
3037
3038     if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
3039       return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
3040     }
3041     return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
3042                        DAG.getConstant(Lane, MVT::i32));
3043   }
3044
3045   bool ReverseVEXT;
3046   unsigned Imm;
3047   if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
3048     if (ReverseVEXT)
3049       std::swap(V1, V2);
3050     return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
3051                        DAG.getConstant(Imm, MVT::i32));
3052   }
3053
3054   if (isVREVMask(ShuffleMask, VT, 64))
3055     return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
3056   if (isVREVMask(ShuffleMask, VT, 32))
3057     return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
3058   if (isVREVMask(ShuffleMask, VT, 16))
3059     return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
3060
3061   // Check for Neon shuffles that modify both input vectors in place.
3062   // If both results are used, i.e., if there are two shuffles with the same
3063   // source operands and with masks corresponding to both results of one of
3064   // these operations, DAG memoization will ensure that a single node is
3065   // used for both shuffles.
3066   unsigned WhichResult;
3067   if (isVTRNMask(ShuffleMask, VT, WhichResult))
3068     return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3069                        V1, V2).getValue(WhichResult);
3070   if (isVUZPMask(ShuffleMask, VT, WhichResult))
3071     return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3072                        V1, V2).getValue(WhichResult);
3073   if (isVZIPMask(ShuffleMask, VT, WhichResult))
3074     return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3075                        V1, V2).getValue(WhichResult);
3076
3077   if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
3078     return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3079                        V1, V1).getValue(WhichResult);
3080   if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
3081     return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3082                        V1, V1).getValue(WhichResult);
3083   if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
3084     return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3085                        V1, V1).getValue(WhichResult);
3086
3087   // If the shuffle is not directly supported and it has 4 elements, use
3088   // the PerfectShuffle-generated table to synthesize it from other shuffles.
3089   unsigned NumElts = VT.getVectorNumElements();
3090   if (NumElts == 4) {
3091     unsigned PFIndexes[4];
3092     for (unsigned i = 0; i != 4; ++i) {
3093       if (ShuffleMask[i] < 0)
3094         PFIndexes[i] = 8;
3095       else
3096         PFIndexes[i] = ShuffleMask[i];
3097     }
3098
3099     // Compute the index in the perfect shuffle table.
3100     unsigned PFTableIndex =
3101       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
3102     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
3103     unsigned Cost = (PFEntry >> 30);
3104
3105     if (Cost <= 4)
3106       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
3107   }
3108
3109   // Implement shuffles with 32- or 64-bit elements as subreg copies.
3110   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3111   if (EltSize >= 32) {
3112     // Do the expansion with floating-point types, since that is what the VFP
3113     // registers are defined to use, and since i64 is not legal.
3114     EVT EltVT = EVT::getFloatingPointVT(EltSize);
3115     EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
3116     V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1);
3117     V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2);
3118     SDValue Val = DAG.getUNDEF(VecVT);
3119     for (unsigned i = 0; i < NumElts; ++i) {
3120       if (ShuffleMask[i] < 0)
3121         continue;
3122       SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
3123                                 ShuffleMask[i] < (int)NumElts ? V1 : V2,
3124                                 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
3125                                                 MVT::i32));
3126       Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val,
3127                         Elt, DAG.getConstant(i, MVT::i32));
3128     }
3129     return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
3130   }
3131
3132   return SDValue();
3133 }
3134
3135 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
3136   EVT VT = Op.getValueType();
3137   DebugLoc dl = Op.getDebugLoc();
3138   SDValue Vec = Op.getOperand(0);
3139   SDValue Lane = Op.getOperand(1);
3140   assert(VT == MVT::i32 &&
3141          Vec.getValueType().getVectorElementType().getSizeInBits() < 32 &&
3142          "unexpected type for custom-lowering vector extract");
3143   return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
3144 }
3145
3146 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
3147   // The only time a CONCAT_VECTORS operation can have legal types is when
3148   // two 64-bit vectors are concatenated to a 128-bit vector.
3149   assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
3150          "unexpected CONCAT_VECTORS");
3151   DebugLoc dl = Op.getDebugLoc();
3152   SDValue Val = DAG.getUNDEF(MVT::v2f64);
3153   SDValue Op0 = Op.getOperand(0);
3154   SDValue Op1 = Op.getOperand(1);
3155   if (Op0.getOpcode() != ISD::UNDEF)
3156     Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
3157                       DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0),
3158                       DAG.getIntPtrConstant(0));
3159   if (Op1.getOpcode() != ISD::UNDEF)
3160     Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
3161                       DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1),
3162                       DAG.getIntPtrConstant(1));
3163   return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val);
3164 }
3165
3166 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3167   switch (Op.getOpcode()) {
3168   default: llvm_unreachable("Don't know how to custom lower this!");
3169   case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
3170   case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
3171   case ISD::GlobalAddress:
3172     return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
3173       LowerGlobalAddressELF(Op, DAG);
3174   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
3175   case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
3176   case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
3177   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
3178   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
3179   case ISD::VASTART:       return LowerVASTART(Op, DAG);
3180   case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
3181   case ISD::SINT_TO_FP:
3182   case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
3183   case ISD::FP_TO_SINT:
3184   case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
3185   case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
3186   case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
3187   case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
3188   case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
3189   case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
3190   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
3191                                                                Subtarget);
3192   case ISD::BIT_CONVERT:   return ExpandBIT_CONVERT(Op.getNode(), DAG);
3193   case ISD::SHL:
3194   case ISD::SRL:
3195   case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
3196   case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
3197   case ISD::SRL_PARTS:
3198   case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
3199   case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
3200   case ISD::VSETCC:        return LowerVSETCC(Op, DAG);
3201   case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG);
3202   case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
3203   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3204   case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
3205   }
3206   return SDValue();
3207 }
3208
3209 /// ReplaceNodeResults - Replace the results of node with an illegal result
3210 /// type with new values built out of custom code.
3211 void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
3212                                            SmallVectorImpl<SDValue>&Results,
3213                                            SelectionDAG &DAG) const {
3214   SDValue Res;
3215   switch (N->getOpcode()) {
3216   default:
3217     llvm_unreachable("Don't know how to custom expand this!");
3218     break;
3219   case ISD::BIT_CONVERT:
3220     Res = ExpandBIT_CONVERT(N, DAG);
3221     break;
3222   case ISD::SRL:
3223   case ISD::SRA:
3224     Res = LowerShift(N, DAG, Subtarget);
3225     break;
3226   }
3227   if (Res.getNode())
3228     Results.push_back(Res);
3229 }
3230
3231 //===----------------------------------------------------------------------===//
3232 //                           ARM Scheduler Hooks
3233 //===----------------------------------------------------------------------===//
3234
3235 MachineBasicBlock *
3236 ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
3237                                      MachineBasicBlock *BB,
3238                                      unsigned Size) const {
3239   unsigned dest    = MI->getOperand(0).getReg();
3240   unsigned ptr     = MI->getOperand(1).getReg();
3241   unsigned oldval  = MI->getOperand(2).getReg();
3242   unsigned newval  = MI->getOperand(3).getReg();
3243   unsigned scratch = BB->getParent()->getRegInfo()
3244     .createVirtualRegister(ARM::GPRRegisterClass);
3245   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3246   DebugLoc dl = MI->getDebugLoc();
3247   bool isThumb2 = Subtarget->isThumb2();
3248
3249   unsigned ldrOpc, strOpc;
3250   switch (Size) {
3251   default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
3252   case 1:
3253     ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
3254     strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
3255     break;
3256   case 2:
3257     ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
3258     strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
3259     break;
3260   case 4:
3261     ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
3262     strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
3263     break;
3264   }
3265
3266   MachineFunction *MF = BB->getParent();
3267   const BasicBlock *LLVM_BB = BB->getBasicBlock();
3268   MachineFunction::iterator It = BB;
3269   ++It; // insert the new blocks after the current block
3270
3271   MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
3272   MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
3273   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3274   MF->insert(It, loop1MBB);
3275   MF->insert(It, loop2MBB);
3276   MF->insert(It, exitMBB);
3277   exitMBB->transferSuccessors(BB);
3278
3279   //  thisMBB:
3280   //   ...
3281   //   fallthrough --> loop1MBB
3282   BB->addSuccessor(loop1MBB);
3283
3284   // loop1MBB:
3285   //   ldrex dest, [ptr]
3286   //   cmp dest, oldval
3287   //   bne exitMBB
3288   BB = loop1MBB;
3289   AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
3290   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
3291                  .addReg(dest).addReg(oldval));
3292   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3293     .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3294   BB->addSuccessor(loop2MBB);
3295   BB->addSuccessor(exitMBB);
3296
3297   // loop2MBB:
3298   //   strex scratch, newval, [ptr]
3299   //   cmp scratch, #0
3300   //   bne loop1MBB
3301   BB = loop2MBB;
3302   AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval)
3303                  .addReg(ptr));
3304   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
3305                  .addReg(scratch).addImm(0));
3306   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3307     .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3308   BB->addSuccessor(loop1MBB);
3309   BB->addSuccessor(exitMBB);
3310
3311   //  exitMBB:
3312   //   ...
3313   BB = exitMBB;
3314
3315   MF->DeleteMachineInstr(MI);   // The instruction is gone now.
3316
3317   return BB;
3318 }
3319
3320 MachineBasicBlock *
3321 ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
3322                                     unsigned Size, unsigned BinOpcode) const {
3323   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
3324   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3325
3326   const BasicBlock *LLVM_BB = BB->getBasicBlock();
3327   MachineFunction *MF = BB->getParent();
3328   MachineFunction::iterator It = BB;
3329   ++It;
3330
3331   unsigned dest = MI->getOperand(0).getReg();
3332   unsigned ptr = MI->getOperand(1).getReg();
3333   unsigned incr = MI->getOperand(2).getReg();
3334   DebugLoc dl = MI->getDebugLoc();
3335
3336   bool isThumb2 = Subtarget->isThumb2();
3337   unsigned ldrOpc, strOpc;
3338   switch (Size) {
3339   default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
3340   case 1:
3341     ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
3342     strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
3343     break;
3344   case 2:
3345     ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
3346     strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
3347     break;
3348   case 4:
3349     ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
3350     strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
3351     break;
3352   }
3353
3354   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3355   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3356   MF->insert(It, loopMBB);
3357   MF->insert(It, exitMBB);
3358   exitMBB->transferSuccessors(BB);
3359
3360   MachineRegisterInfo &RegInfo = MF->getRegInfo();
3361   unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
3362   unsigned scratch2 = (!BinOpcode) ? incr :
3363     RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
3364
3365   //  thisMBB:
3366   //   ...
3367   //   fallthrough --> loopMBB
3368   BB->addSuccessor(loopMBB);
3369
3370   //  loopMBB:
3371   //   ldrex dest, ptr
3372   //   <binop> scratch2, dest, incr
3373   //   strex scratch, scratch2, ptr
3374   //   cmp scratch, #0
3375   //   bne- loopMBB
3376   //   fallthrough --> exitMBB
3377   BB = loopMBB;
3378   AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
3379   if (BinOpcode) {
3380     // operand order needs to go the other way for NAND
3381     if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
3382       AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
3383                      addReg(incr).addReg(dest)).addReg(0);
3384     else
3385       AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
3386                      addReg(dest).addReg(incr)).addReg(0);
3387   }
3388
3389   AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
3390                  .addReg(ptr));
3391   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
3392                  .addReg(scratch).addImm(0));
3393   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3394     .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3395
3396   BB->addSuccessor(loopMBB);
3397   BB->addSuccessor(exitMBB);
3398
3399   //  exitMBB:
3400   //   ...
3401   BB = exitMBB;
3402
3403   MF->DeleteMachineInstr(MI);   // The instruction is gone now.
3404
3405   return BB;
3406 }
3407
3408 MachineBasicBlock *
3409 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
3410                                                MachineBasicBlock *BB) const {
3411   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3412   DebugLoc dl = MI->getDebugLoc();
3413   bool isThumb2 = Subtarget->isThumb2();
3414   switch (MI->getOpcode()) {
3415   default:
3416     MI->dump();
3417     llvm_unreachable("Unexpected instr type to insert");
3418
3419   case ARM::ATOMIC_LOAD_ADD_I8:
3420      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
3421   case ARM::ATOMIC_LOAD_ADD_I16:
3422      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
3423   case ARM::ATOMIC_LOAD_ADD_I32:
3424      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
3425
3426   case ARM::ATOMIC_LOAD_AND_I8:
3427      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
3428   case ARM::ATOMIC_LOAD_AND_I16:
3429      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
3430   case ARM::ATOMIC_LOAD_AND_I32:
3431      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
3432
3433   case ARM::ATOMIC_LOAD_OR_I8:
3434      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
3435   case ARM::ATOMIC_LOAD_OR_I16:
3436      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
3437   case ARM::ATOMIC_LOAD_OR_I32:
3438      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
3439
3440   case ARM::ATOMIC_LOAD_XOR_I8:
3441      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
3442   case ARM::ATOMIC_LOAD_XOR_I16:
3443      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
3444   case ARM::ATOMIC_LOAD_XOR_I32:
3445      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
3446
3447   case ARM::ATOMIC_LOAD_NAND_I8:
3448      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
3449   case ARM::ATOMIC_LOAD_NAND_I16:
3450      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
3451   case ARM::ATOMIC_LOAD_NAND_I32:
3452      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
3453
3454   case ARM::ATOMIC_LOAD_SUB_I8:
3455      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
3456   case ARM::ATOMIC_LOAD_SUB_I16:
3457      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
3458   case ARM::ATOMIC_LOAD_SUB_I32:
3459      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
3460
3461   case ARM::ATOMIC_SWAP_I8:  return EmitAtomicBinary(MI, BB, 1, 0);
3462   case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
3463   case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
3464
3465   case ARM::ATOMIC_CMP_SWAP_I8:  return EmitAtomicCmpSwap(MI, BB, 1);
3466   case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
3467   case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
3468
3469   case ARM::tMOVCCr_pseudo: {
3470     // To "insert" a SELECT_CC instruction, we actually have to insert the
3471     // diamond control-flow pattern.  The incoming instruction knows the
3472     // destination vreg to set, the condition code register to branch on, the
3473     // true/false values to select between, and a branch opcode to use.
3474     const BasicBlock *LLVM_BB = BB->getBasicBlock();
3475     MachineFunction::iterator It = BB;
3476     ++It;
3477
3478     //  thisMBB:
3479     //  ...
3480     //   TrueVal = ...
3481     //   cmpTY ccX, r1, r2
3482     //   bCC copy1MBB
3483     //   fallthrough --> copy0MBB
3484     MachineBasicBlock *thisMBB  = BB;
3485     MachineFunction *F = BB->getParent();
3486     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
3487     MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
3488     BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
3489       .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
3490     F->insert(It, copy0MBB);
3491     F->insert(It, sinkMBB);
3492     // Update machine-CFG edges by first adding all successors of the current
3493     // block to the new block which will contain the Phi node for the select.
3494     for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
3495            E = BB->succ_end(); I != E; ++I)
3496       sinkMBB->addSuccessor(*I);
3497     // Next, remove all successors of the current block, and add the true
3498     // and fallthrough blocks as its successors.
3499     while (!BB->succ_empty())
3500       BB->removeSuccessor(BB->succ_begin());
3501     BB->addSuccessor(copy0MBB);
3502     BB->addSuccessor(sinkMBB);
3503
3504     //  copy0MBB:
3505     //   %FalseValue = ...
3506     //   # fallthrough to sinkMBB
3507     BB = copy0MBB;
3508
3509     // Update machine-CFG edges
3510     BB->addSuccessor(sinkMBB);
3511
3512     //  sinkMBB:
3513     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
3514     //  ...
3515     BB = sinkMBB;
3516     BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg())
3517       .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
3518       .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
3519
3520     F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
3521     return BB;
3522   }
3523
3524   case ARM::tANDsp:
3525   case ARM::tADDspr_:
3526   case ARM::tSUBspi_:
3527   case ARM::t2SUBrSPi_:
3528   case ARM::t2SUBrSPi12_:
3529   case ARM::t2SUBrSPs_: {
3530     MachineFunction *MF = BB->getParent();
3531     unsigned DstReg = MI->getOperand(0).getReg();
3532     unsigned SrcReg = MI->getOperand(1).getReg();
3533     bool DstIsDead = MI->getOperand(0).isDead();
3534     bool SrcIsKill = MI->getOperand(1).isKill();
3535
3536     if (SrcReg != ARM::SP) {
3537       // Copy the source to SP from virtual register.
3538       const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg);
3539       unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
3540         ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr;
3541       BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP)
3542         .addReg(SrcReg, getKillRegState(SrcIsKill));
3543     }
3544
3545     unsigned OpOpc = 0;
3546     bool NeedPred = false, NeedCC = false, NeedOp3 = false;
3547     switch (MI->getOpcode()) {
3548     default:
3549       llvm_unreachable("Unexpected pseudo instruction!");
3550     case ARM::tANDsp:
3551       OpOpc = ARM::tAND;
3552       NeedPred = true;
3553       break;
3554     case ARM::tADDspr_:
3555       OpOpc = ARM::tADDspr;
3556       break;
3557     case ARM::tSUBspi_:
3558       OpOpc = ARM::tSUBspi;
3559       break;
3560     case ARM::t2SUBrSPi_:
3561       OpOpc = ARM::t2SUBrSPi;
3562       NeedPred = true; NeedCC = true;
3563       break;
3564     case ARM::t2SUBrSPi12_:
3565       OpOpc = ARM::t2SUBrSPi12;
3566       NeedPred = true;
3567       break;
3568     case ARM::t2SUBrSPs_:
3569       OpOpc = ARM::t2SUBrSPs;
3570       NeedPred = true; NeedCC = true; NeedOp3 = true;
3571       break;
3572     }
3573     MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP);
3574     if (OpOpc == ARM::tAND)
3575       AddDefaultT1CC(MIB);
3576     MIB.addReg(ARM::SP);
3577     MIB.addOperand(MI->getOperand(2));
3578     if (NeedOp3)
3579       MIB.addOperand(MI->getOperand(3));
3580     if (NeedPred)
3581       AddDefaultPred(MIB);
3582     if (NeedCC)
3583       AddDefaultCC(MIB);
3584
3585     // Copy the result from SP to virtual register.
3586     const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg);
3587     unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
3588       ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr;
3589     BuildMI(BB, dl, TII->get(CopyOpc))
3590       .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
3591       .addReg(ARM::SP);
3592     MF->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
3593     return BB;
3594   }
3595   }
3596 }
3597
3598 //===----------------------------------------------------------------------===//
3599 //                           ARM Optimization Hooks
3600 //===----------------------------------------------------------------------===//
3601
3602 static
3603 SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
3604                             TargetLowering::DAGCombinerInfo &DCI) {
3605   SelectionDAG &DAG = DCI.DAG;
3606   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3607   EVT VT = N->getValueType(0);
3608   unsigned Opc = N->getOpcode();
3609   bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
3610   SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
3611   SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
3612   ISD::CondCode CC = ISD::SETCC_INVALID;
3613
3614   if (isSlctCC) {
3615     CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
3616   } else {
3617     SDValue CCOp = Slct.getOperand(0);
3618     if (CCOp.getOpcode() == ISD::SETCC)
3619       CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
3620   }
3621
3622   bool DoXform = false;
3623   bool InvCC = false;
3624   assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
3625           "Bad input!");
3626
3627   if (LHS.getOpcode() == ISD::Constant &&
3628       cast<ConstantSDNode>(LHS)->isNullValue()) {
3629     DoXform = true;
3630   } else if (CC != ISD::SETCC_INVALID &&
3631              RHS.getOpcode() == ISD::Constant &&
3632              cast<ConstantSDNode>(RHS)->isNullValue()) {
3633     std::swap(LHS, RHS);
3634     SDValue Op0 = Slct.getOperand(0);
3635     EVT OpVT = isSlctCC ? Op0.getValueType() :
3636                           Op0.getOperand(0).getValueType();
3637     bool isInt = OpVT.isInteger();
3638     CC = ISD::getSetCCInverse(CC, isInt);
3639
3640     if (!TLI.isCondCodeLegal(CC, OpVT))
3641       return SDValue();         // Inverse operator isn't legal.
3642
3643     DoXform = true;
3644     InvCC = true;
3645   }
3646
3647   if (DoXform) {
3648     SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
3649     if (isSlctCC)
3650       return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
3651                              Slct.getOperand(0), Slct.getOperand(1), CC);
3652     SDValue CCOp = Slct.getOperand(0);
3653     if (InvCC)
3654       CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
3655                           CCOp.getOperand(0), CCOp.getOperand(1), CC);
3656     return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
3657                        CCOp, OtherOp, Result);
3658   }
3659   return SDValue();
3660 }
3661
3662 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
3663 static SDValue PerformADDCombine(SDNode *N,
3664                                  TargetLowering::DAGCombinerInfo &DCI) {
3665   // added by evan in r37685 with no testcase.
3666   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3667
3668   // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
3669   if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
3670     SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
3671     if (Result.getNode()) return Result;
3672   }
3673   if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
3674     SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
3675     if (Result.getNode()) return Result;
3676   }
3677
3678   return SDValue();
3679 }
3680
3681 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
3682 static SDValue PerformSUBCombine(SDNode *N,
3683                                  TargetLowering::DAGCombinerInfo &DCI) {
3684   // added by evan in r37685 with no testcase.
3685   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3686
3687   // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
3688   if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
3689     SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
3690     if (Result.getNode()) return Result;
3691   }
3692
3693   return SDValue();
3694 }
3695
3696 static SDValue PerformMULCombine(SDNode *N,
3697                                  TargetLowering::DAGCombinerInfo &DCI,
3698                                  const ARMSubtarget *Subtarget) {
3699   SelectionDAG &DAG = DCI.DAG;
3700
3701   if (Subtarget->isThumb1Only())
3702     return SDValue();
3703
3704   if (DAG.getMachineFunction().
3705       getFunction()->hasFnAttr(Attribute::OptimizeForSize))
3706     return SDValue();
3707
3708   if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
3709     return SDValue();
3710
3711   EVT VT = N->getValueType(0);
3712   if (VT != MVT::i32)
3713     return SDValue();
3714
3715   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3716   if (!C)
3717     return SDValue();
3718
3719   uint64_t MulAmt = C->getZExtValue();
3720   unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
3721   ShiftAmt = ShiftAmt & (32 - 1);
3722   SDValue V = N->getOperand(0);
3723   DebugLoc DL = N->getDebugLoc();
3724
3725   SDValue Res;
3726   MulAmt >>= ShiftAmt;
3727   if (isPowerOf2_32(MulAmt - 1)) {
3728     // (mul x, 2^N + 1) => (add (shl x, N), x)
3729     Res = DAG.getNode(ISD::ADD, DL, VT,
3730                       V, DAG.getNode(ISD::SHL, DL, VT,
3731                                      V, DAG.getConstant(Log2_32(MulAmt-1),
3732                                                         MVT::i32)));
3733   } else if (isPowerOf2_32(MulAmt + 1)) {
3734     // (mul x, 2^N - 1) => (sub (shl x, N), x)
3735     Res = DAG.getNode(ISD::SUB, DL, VT,
3736                       DAG.getNode(ISD::SHL, DL, VT,
3737                                   V, DAG.getConstant(Log2_32(MulAmt+1),
3738                                                      MVT::i32)),
3739                                                      V);
3740   } else
3741     return SDValue();
3742
3743   if (ShiftAmt != 0)
3744     Res = DAG.getNode(ISD::SHL, DL, VT, Res,
3745                       DAG.getConstant(ShiftAmt, MVT::i32));
3746
3747   // Do not add new nodes to DAG combiner worklist.
3748   DCI.CombineTo(N, Res, false);
3749   return SDValue();
3750 }
3751
3752 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for
3753 /// ARMISD::VMOVRRD.
3754 static SDValue PerformVMOVRRDCombine(SDNode *N,
3755                                    TargetLowering::DAGCombinerInfo &DCI) {
3756   // fmrrd(fmdrr x, y) -> x,y
3757   SDValue InDouble = N->getOperand(0);
3758   if (InDouble.getOpcode() == ARMISD::VMOVDRR)
3759     return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
3760   return SDValue();
3761 }
3762
3763 /// getVShiftImm - Check if this is a valid build_vector for the immediate
3764 /// operand of a vector shift operation, where all the elements of the
3765 /// build_vector must have the same constant integer value.
3766 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
3767   // Ignore bit_converts.
3768   while (Op.getOpcode() == ISD::BIT_CONVERT)
3769     Op = Op.getOperand(0);
3770   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
3771   APInt SplatBits, SplatUndef;
3772   unsigned SplatBitSize;
3773   bool HasAnyUndefs;
3774   if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
3775                                       HasAnyUndefs, ElementBits) ||
3776       SplatBitSize > ElementBits)
3777     return false;
3778   Cnt = SplatBits.getSExtValue();
3779   return true;
3780 }
3781
3782 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
3783 /// operand of a vector shift left operation.  That value must be in the range:
3784 ///   0 <= Value < ElementBits for a left shift; or
3785 ///   0 <= Value <= ElementBits for a long left shift.
3786 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
3787   assert(VT.isVector() && "vector shift count is not a vector type");
3788   unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
3789   if (! getVShiftImm(Op, ElementBits, Cnt))
3790     return false;
3791   return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
3792 }
3793
3794 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
3795 /// operand of a vector shift right operation.  For a shift opcode, the value
3796 /// is positive, but for an intrinsic the value count must be negative. The
3797 /// absolute value must be in the range:
3798 ///   1 <= |Value| <= ElementBits for a right shift; or
3799 ///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
3800 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
3801                          int64_t &Cnt) {
3802   assert(VT.isVector() && "vector shift count is not a vector type");
3803   unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
3804   if (! getVShiftImm(Op, ElementBits, Cnt))
3805     return false;
3806   if (isIntrinsic)
3807     Cnt = -Cnt;
3808   return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
3809 }
3810
3811 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
3812 static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
3813   unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3814   switch (IntNo) {
3815   default:
3816     // Don't do anything for most intrinsics.
3817     break;
3818
3819   // Vector shifts: check for immediate versions and lower them.
3820   // Note: This is done during DAG combining instead of DAG legalizing because
3821   // the build_vectors for 64-bit vector element shift counts are generally
3822   // not legal, and it is hard to see their values after they get legalized to
3823   // loads from a constant pool.
3824   case Intrinsic::arm_neon_vshifts:
3825   case Intrinsic::arm_neon_vshiftu:
3826   case Intrinsic::arm_neon_vshiftls:
3827   case Intrinsic::arm_neon_vshiftlu:
3828   case Intrinsic::arm_neon_vshiftn:
3829   case Intrinsic::arm_neon_vrshifts:
3830   case Intrinsic::arm_neon_vrshiftu:
3831   case Intrinsic::arm_neon_vrshiftn:
3832   case Intrinsic::arm_neon_vqshifts:
3833   case Intrinsic::arm_neon_vqshiftu:
3834   case Intrinsic::arm_neon_vqshiftsu:
3835   case Intrinsic::arm_neon_vqshiftns:
3836   case Intrinsic::arm_neon_vqshiftnu:
3837   case Intrinsic::arm_neon_vqshiftnsu:
3838   case Intrinsic::arm_neon_vqrshiftns:
3839   case Intrinsic::arm_neon_vqrshiftnu:
3840   case Intrinsic::arm_neon_vqrshiftnsu: {
3841     EVT VT = N->getOperand(1).getValueType();
3842     int64_t Cnt;
3843     unsigned VShiftOpc = 0;
3844
3845     switch (IntNo) {
3846     case Intrinsic::arm_neon_vshifts:
3847     case Intrinsic::arm_neon_vshiftu:
3848       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
3849         VShiftOpc = ARMISD::VSHL;
3850         break;
3851       }
3852       if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
3853         VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
3854                      ARMISD::VSHRs : ARMISD::VSHRu);
3855         break;
3856       }
3857       return SDValue();
3858
3859     case Intrinsic::arm_neon_vshiftls:
3860     case Intrinsic::arm_neon_vshiftlu:
3861       if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
3862         break;
3863       llvm_unreachable("invalid shift count for vshll intrinsic");
3864
3865     case Intrinsic::arm_neon_vrshifts:
3866     case Intrinsic::arm_neon_vrshiftu:
3867       if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
3868         break;
3869       return SDValue();
3870
3871     case Intrinsic::arm_neon_vqshifts:
3872     case Intrinsic::arm_neon_vqshiftu:
3873       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
3874         break;
3875       return SDValue();
3876
3877     case Intrinsic::arm_neon_vqshiftsu:
3878       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
3879         break;
3880       llvm_unreachable("invalid shift count for vqshlu intrinsic");
3881
3882     case Intrinsic::arm_neon_vshiftn:
3883     case Intrinsic::arm_neon_vrshiftn:
3884     case Intrinsic::arm_neon_vqshiftns:
3885     case Intrinsic::arm_neon_vqshiftnu:
3886     case Intrinsic::arm_neon_vqshiftnsu:
3887     case Intrinsic::arm_neon_vqrshiftns:
3888     case Intrinsic::arm_neon_vqrshiftnu:
3889     case Intrinsic::arm_neon_vqrshiftnsu:
3890       // Narrowing shifts require an immediate right shift.
3891       if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
3892         break;
3893       llvm_unreachable("invalid shift count for narrowing vector shift intrinsic");
3894
3895     default:
3896       llvm_unreachable("unhandled vector shift");
3897     }
3898
3899     switch (IntNo) {
3900     case Intrinsic::arm_neon_vshifts:
3901     case Intrinsic::arm_neon_vshiftu:
3902       // Opcode already set above.
3903       break;
3904     case Intrinsic::arm_neon_vshiftls:
3905     case Intrinsic::arm_neon_vshiftlu:
3906       if (Cnt == VT.getVectorElementType().getSizeInBits())
3907         VShiftOpc = ARMISD::VSHLLi;
3908       else
3909         VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
3910                      ARMISD::VSHLLs : ARMISD::VSHLLu);
3911       break;
3912     case Intrinsic::arm_neon_vshiftn:
3913       VShiftOpc = ARMISD::VSHRN; break;
3914     case Intrinsic::arm_neon_vrshifts:
3915       VShiftOpc = ARMISD::VRSHRs; break;
3916     case Intrinsic::arm_neon_vrshiftu:
3917       VShiftOpc = ARMISD::VRSHRu; break;
3918     case Intrinsic::arm_neon_vrshiftn:
3919       VShiftOpc = ARMISD::VRSHRN; break;
3920     case Intrinsic::arm_neon_vqshifts:
3921       VShiftOpc = ARMISD::VQSHLs; break;
3922     case Intrinsic::arm_neon_vqshiftu:
3923       VShiftOpc = ARMISD::VQSHLu; break;
3924     case Intrinsic::arm_neon_vqshiftsu:
3925       VShiftOpc = ARMISD::VQSHLsu; break;
3926     case Intrinsic::arm_neon_vqshiftns:
3927       VShiftOpc = ARMISD::VQSHRNs; break;
3928     case Intrinsic::arm_neon_vqshiftnu:
3929       VShiftOpc = ARMISD::VQSHRNu; break;
3930     case Intrinsic::arm_neon_vqshiftnsu:
3931       VShiftOpc = ARMISD::VQSHRNsu; break;
3932     case Intrinsic::arm_neon_vqrshiftns:
3933       VShiftOpc = ARMISD::VQRSHRNs; break;
3934     case Intrinsic::arm_neon_vqrshiftnu:
3935       VShiftOpc = ARMISD::VQRSHRNu; break;
3936     case Intrinsic::arm_neon_vqrshiftnsu:
3937       VShiftOpc = ARMISD::VQRSHRNsu; break;
3938     }
3939
3940     return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
3941                        N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
3942   }
3943
3944   case Intrinsic::arm_neon_vshiftins: {
3945     EVT VT = N->getOperand(1).getValueType();
3946     int64_t Cnt;
3947     unsigned VShiftOpc = 0;
3948
3949     if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
3950       VShiftOpc = ARMISD::VSLI;
3951     else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
3952       VShiftOpc = ARMISD::VSRI;
3953     else {
3954       llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
3955     }
3956
3957     return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
3958                        N->getOperand(1), N->getOperand(2),
3959                        DAG.getConstant(Cnt, MVT::i32));
3960   }
3961
3962   case Intrinsic::arm_neon_vqrshifts:
3963   case Intrinsic::arm_neon_vqrshiftu:
3964     // No immediate versions of these to check for.
3965     break;
3966   }
3967
3968   return SDValue();
3969 }
3970
3971 /// PerformShiftCombine - Checks for immediate versions of vector shifts and
3972 /// lowers them.  As with the vector shift intrinsics, this is done during DAG
3973 /// combining instead of DAG legalizing because the build_vectors for 64-bit
3974 /// vector element shift counts are generally not legal, and it is hard to see
3975 /// their values after they get legalized to loads from a constant pool.
3976 static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
3977                                    const ARMSubtarget *ST) {
3978   EVT VT = N->getValueType(0);
3979
3980   // Nothing to be done for scalar shifts.
3981   if (! VT.isVector())
3982     return SDValue();
3983
3984   assert(ST->hasNEON() && "unexpected vector shift");
3985   int64_t Cnt;
3986
3987   switch (N->getOpcode()) {
3988   default: llvm_unreachable("unexpected shift opcode");
3989
3990   case ISD::SHL:
3991     if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
3992       return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
3993                          DAG.getConstant(Cnt, MVT::i32));
3994     break;
3995
3996   case ISD::SRA:
3997   case ISD::SRL:
3998     if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
3999       unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
4000                             ARMISD::VSHRs : ARMISD::VSHRu);
4001       return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
4002                          DAG.getConstant(Cnt, MVT::i32));
4003     }
4004   }
4005   return SDValue();
4006 }
4007
4008 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
4009 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
4010 static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
4011                                     const ARMSubtarget *ST) {
4012   SDValue N0 = N->getOperand(0);
4013
4014   // Check for sign- and zero-extensions of vector extract operations of 8-
4015   // and 16-bit vector elements.  NEON supports these directly.  They are
4016   // handled during DAG combining because type legalization will promote them
4017   // to 32-bit types and it is messy to recognize the operations after that.
4018   if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
4019     SDValue Vec = N0.getOperand(0);
4020     SDValue Lane = N0.getOperand(1);
4021     EVT VT = N->getValueType(0);
4022     EVT EltVT = N0.getValueType();
4023     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4024
4025     if (VT == MVT::i32 &&
4026         (EltVT == MVT::i8 || EltVT == MVT::i16) &&
4027         TLI.isTypeLegal(Vec.getValueType())) {
4028
4029       unsigned Opc = 0;
4030       switch (N->getOpcode()) {
4031       default: llvm_unreachable("unexpected opcode");
4032       case ISD::SIGN_EXTEND:
4033         Opc = ARMISD::VGETLANEs;
4034         break;
4035       case ISD::ZERO_EXTEND:
4036       case ISD::ANY_EXTEND:
4037         Opc = ARMISD::VGETLANEu;
4038         break;
4039       }
4040       return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
4041     }
4042   }
4043
4044   return SDValue();
4045 }
4046
4047 /// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
4048 /// to match f32 max/min patterns to use NEON vmax/vmin instructions.
4049 static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
4050                                        const ARMSubtarget *ST) {
4051   // If the target supports NEON, try to use vmax/vmin instructions for f32
4052   // selects like "x < y ? x : y".  Unless the FiniteOnlyFPMath option is set,
4053   // be careful about NaNs:  NEON's vmax/vmin return NaN if either operand is
4054   // a NaN; only do the transformation when it matches that behavior.
4055
4056   // For now only do this when using NEON for FP operations; if using VFP, it
4057   // is not obvious that the benefit outweighs the cost of switching to the
4058   // NEON pipeline.
4059   if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
4060       N->getValueType(0) != MVT::f32)
4061     return SDValue();
4062
4063   SDValue CondLHS = N->getOperand(0);
4064   SDValue CondRHS = N->getOperand(1);
4065   SDValue LHS = N->getOperand(2);
4066   SDValue RHS = N->getOperand(3);
4067   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
4068
4069   unsigned Opcode = 0;
4070   bool IsReversed;
4071   if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
4072     IsReversed = false; // x CC y ? x : y
4073   } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
4074     IsReversed = true ; // x CC y ? y : x
4075   } else {
4076     return SDValue();
4077   }
4078
4079   bool IsUnordered;
4080   switch (CC) {
4081   default: break;
4082   case ISD::SETOLT:
4083   case ISD::SETOLE:
4084   case ISD::SETLT:
4085   case ISD::SETLE:
4086   case ISD::SETULT:
4087   case ISD::SETULE:
4088     // If LHS is NaN, an ordered comparison will be false and the result will
4089     // be the RHS, but vmin(NaN, RHS) = NaN.  Avoid this by checking that LHS
4090     // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
4091     IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
4092     if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
4093       break;
4094     // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
4095     // will return -0, so vmin can only be used for unsafe math or if one of
4096     // the operands is known to be nonzero.
4097     if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
4098         !UnsafeFPMath &&
4099         !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
4100       break;
4101     Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
4102     break;
4103
4104   case ISD::SETOGT:
4105   case ISD::SETOGE:
4106   case ISD::SETGT:
4107   case ISD::SETGE:
4108   case ISD::SETUGT:
4109   case ISD::SETUGE:
4110     // If LHS is NaN, an ordered comparison will be false and the result will
4111     // be the RHS, but vmax(NaN, RHS) = NaN.  Avoid this by checking that LHS
4112     // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
4113     IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
4114     if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
4115       break;
4116     // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
4117     // will return +0, so vmax can only be used for unsafe math or if one of
4118     // the operands is known to be nonzero.
4119     if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
4120         !UnsafeFPMath &&
4121         !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
4122       break;
4123     Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
4124     break;
4125   }
4126
4127   if (!Opcode)
4128     return SDValue();
4129   return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
4130 }
4131
4132 SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
4133                                              DAGCombinerInfo &DCI) const {
4134   switch (N->getOpcode()) {
4135   default: break;
4136   case ISD::ADD:        return PerformADDCombine(N, DCI);
4137   case ISD::SUB:        return PerformSUBCombine(N, DCI);
4138   case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
4139   case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
4140   case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
4141   case ISD::SHL:
4142   case ISD::SRA:
4143   case ISD::SRL:        return PerformShiftCombine(N, DCI.DAG, Subtarget);
4144   case ISD::SIGN_EXTEND:
4145   case ISD::ZERO_EXTEND:
4146   case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
4147   case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
4148   }
4149   return SDValue();
4150 }
4151
4152 bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
4153   if (!Subtarget->hasV6Ops())
4154     // Pre-v6 does not support unaligned mem access.
4155     return false;
4156   else {
4157     // v6+ may or may not support unaligned mem access depending on the system
4158     // configuration.
4159     // FIXME: This is pretty conservative. Should we provide cmdline option to
4160     // control the behaviour?
4161     if (!Subtarget->isTargetDarwin())
4162       return false;
4163   }
4164
4165   switch (VT.getSimpleVT().SimpleTy) {
4166   default:
4167     return false;
4168   case MVT::i8:
4169   case MVT::i16:
4170   case MVT::i32:
4171     return true;
4172   // FIXME: VLD1 etc with standard alignment is legal.
4173   }
4174 }
4175
4176 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
4177   if (V < 0)
4178     return false;
4179
4180   unsigned Scale = 1;
4181   switch (VT.getSimpleVT().SimpleTy) {
4182   default: return false;
4183   case MVT::i1:
4184   case MVT::i8:
4185     // Scale == 1;
4186     break;
4187   case MVT::i16:
4188     // Scale == 2;
4189     Scale = 2;
4190     break;
4191   case MVT::i32:
4192     // Scale == 4;
4193     Scale = 4;
4194     break;
4195   }
4196
4197   if ((V & (Scale - 1)) != 0)
4198     return false;
4199   V /= Scale;
4200   return V == (V & ((1LL << 5) - 1));
4201 }
4202
4203 static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
4204                                       const ARMSubtarget *Subtarget) {
4205   bool isNeg = false;
4206   if (V < 0) {
4207     isNeg = true;
4208     V = - V;
4209   }
4210
4211   switch (VT.getSimpleVT().SimpleTy) {
4212   default: return false;
4213   case MVT::i1:
4214   case MVT::i8:
4215   case MVT::i16:
4216   case MVT::i32:
4217     // + imm12 or - imm8
4218     if (isNeg)
4219       return V == (V & ((1LL << 8) - 1));
4220     return V == (V & ((1LL << 12) - 1));
4221   case MVT::f32:
4222   case MVT::f64:
4223     // Same as ARM mode. FIXME: NEON?
4224     if (!Subtarget->hasVFP2())
4225       return false;
4226     if ((V & 3) != 0)
4227       return false;
4228     V >>= 2;
4229     return V == (V & ((1LL << 8) - 1));
4230   }
4231 }
4232
4233 /// isLegalAddressImmediate - Return true if the integer value can be used
4234 /// as the offset of the target addressing mode for load / store of the
4235 /// given type.
4236 static bool isLegalAddressImmediate(int64_t V, EVT VT,
4237                                     const ARMSubtarget *Subtarget) {
4238   if (V == 0)
4239     return true;
4240
4241   if (!VT.isSimple())
4242     return false;
4243
4244   if (Subtarget->isThumb1Only())
4245     return isLegalT1AddressImmediate(V, VT);
4246   else if (Subtarget->isThumb2())
4247     return isLegalT2AddressImmediate(V, VT, Subtarget);
4248
4249   // ARM mode.
4250   if (V < 0)
4251     V = - V;
4252   switch (VT.getSimpleVT().SimpleTy) {
4253   default: return false;
4254   case MVT::i1:
4255   case MVT::i8:
4256   case MVT::i32:
4257     // +- imm12
4258     return V == (V & ((1LL << 12) - 1));
4259   case MVT::i16:
4260     // +- imm8
4261     return V == (V & ((1LL << 8) - 1));
4262   case MVT::f32:
4263   case MVT::f64:
4264     if (!Subtarget->hasVFP2()) // FIXME: NEON?
4265       return false;
4266     if ((V & 3) != 0)
4267       return false;
4268     V >>= 2;
4269     return V == (V & ((1LL << 8) - 1));
4270   }
4271 }
4272
4273 bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
4274                                                       EVT VT) const {
4275   int Scale = AM.Scale;
4276   if (Scale < 0)
4277     return false;
4278
4279   switch (VT.getSimpleVT().SimpleTy) {
4280   default: return false;
4281   case MVT::i1:
4282   case MVT::i8:
4283   case MVT::i16:
4284   case MVT::i32:
4285     if (Scale == 1)
4286       return true;
4287     // r + r << imm
4288     Scale = Scale & ~1;
4289     return Scale == 2 || Scale == 4 || Scale == 8;
4290   case MVT::i64:
4291     // r + r
4292     if (((unsigned)AM.HasBaseReg + Scale) <= 2)
4293       return true;
4294     return false;
4295   case MVT::isVoid:
4296     // Note, we allow "void" uses (basically, uses that aren't loads or
4297     // stores), because arm allows folding a scale into many arithmetic
4298     // operations.  This should be made more precise and revisited later.
4299
4300     // Allow r << imm, but the imm has to be a multiple of two.
4301     if (Scale & 1) return false;
4302     return isPowerOf2_32(Scale);
4303   }
4304 }
4305
4306 /// isLegalAddressingMode - Return true if the addressing mode represented
4307 /// by AM is legal for this target, for a load/store of the specified type.
4308 bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
4309                                               const Type *Ty) const {
4310   EVT VT = getValueType(Ty, true);
4311   if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
4312     return false;
4313
4314   // Can never fold addr of global into load/store.
4315   if (AM.BaseGV)
4316     return false;
4317
4318   switch (AM.Scale) {
4319   case 0:  // no scale reg, must be "r+i" or "r", or "i".
4320     break;
4321   case 1:
4322     if (Subtarget->isThumb1Only())
4323       return false;
4324     // FALL THROUGH.
4325   default:
4326     // ARM doesn't support any R+R*scale+imm addr modes.
4327     if (AM.BaseOffs)
4328       return false;
4329
4330     if (!VT.isSimple())
4331       return false;
4332
4333     if (Subtarget->isThumb2())
4334       return isLegalT2ScaledAddressingMode(AM, VT);
4335
4336     int Scale = AM.Scale;
4337     switch (VT.getSimpleVT().SimpleTy) {
4338     default: return false;
4339     case MVT::i1:
4340     case MVT::i8:
4341     case MVT::i32:
4342       if (Scale < 0) Scale = -Scale;
4343       if (Scale == 1)
4344         return true;
4345       // r + r << imm
4346       return isPowerOf2_32(Scale & ~1);
4347     case MVT::i16:
4348     case MVT::i64:
4349       // r + r
4350       if (((unsigned)AM.HasBaseReg + Scale) <= 2)
4351         return true;
4352       return false;
4353
4354     case MVT::isVoid:
4355       // Note, we allow "void" uses (basically, uses that aren't loads or
4356       // stores), because arm allows folding a scale into many arithmetic
4357       // operations.  This should be made more precise and revisited later.
4358
4359       // Allow r << imm, but the imm has to be a multiple of two.
4360       if (Scale & 1) return false;
4361       return isPowerOf2_32(Scale);
4362     }
4363     break;
4364   }
4365   return true;
4366 }
4367
4368 /// isLegalICmpImmediate - Return true if the specified immediate is legal
4369 /// icmp immediate, that is the target has icmp instructions which can compare
4370 /// a register against the immediate without having to materialize the
4371 /// immediate into a register.
4372 bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
4373   if (!Subtarget->isThumb())
4374     return ARM_AM::getSOImmVal(Imm) != -1;
4375   if (Subtarget->isThumb2())
4376     return ARM_AM::getT2SOImmVal(Imm) != -1;
4377   return Imm >= 0 && Imm <= 255;
4378 }
4379
4380 static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
4381                                       bool isSEXTLoad, SDValue &Base,
4382                                       SDValue &Offset, bool &isInc,
4383                                       SelectionDAG &DAG) {
4384   if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
4385     return false;
4386
4387   if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
4388     // AddressingMode 3
4389     Base = Ptr->getOperand(0);
4390     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
4391       int RHSC = (int)RHS->getZExtValue();
4392       if (RHSC < 0 && RHSC > -256) {
4393         assert(Ptr->getOpcode() == ISD::ADD);
4394         isInc = false;
4395         Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
4396         return true;
4397       }
4398     }
4399     isInc = (Ptr->getOpcode() == ISD::ADD);
4400     Offset = Ptr->getOperand(1);
4401     return true;
4402   } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
4403     // AddressingMode 2
4404     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
4405       int RHSC = (int)RHS->getZExtValue();
4406       if (RHSC < 0 && RHSC > -0x1000) {
4407         assert(Ptr->getOpcode() == ISD::ADD);
4408         isInc = false;
4409         Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
4410         Base = Ptr->getOperand(0);
4411         return true;
4412       }
4413     }
4414
4415     if (Ptr->getOpcode() == ISD::ADD) {
4416       isInc = true;
4417       ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
4418       if (ShOpcVal != ARM_AM::no_shift) {
4419         Base = Ptr->getOperand(1);
4420         Offset = Ptr->getOperand(0);
4421       } else {
4422         Base = Ptr->getOperand(0);
4423         Offset = Ptr->getOperand(1);
4424       }
4425       return true;
4426     }
4427
4428     isInc = (Ptr->getOpcode() == ISD::ADD);
4429     Base = Ptr->getOperand(0);
4430     Offset = Ptr->getOperand(1);
4431     return true;
4432   }
4433
4434   // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
4435   return false;
4436 }
4437
4438 static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
4439                                      bool isSEXTLoad, SDValue &Base,
4440                                      SDValue &Offset, bool &isInc,
4441                                      SelectionDAG &DAG) {
4442   if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
4443     return false;
4444
4445   Base = Ptr->getOperand(0);
4446   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
4447     int RHSC = (int)RHS->getZExtValue();
4448     if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
4449       assert(Ptr->getOpcode() == ISD::ADD);
4450       isInc = false;
4451       Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
4452       return true;
4453     } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
4454       isInc = Ptr->getOpcode() == ISD::ADD;
4455       Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
4456       return true;
4457     }
4458   }
4459
4460   return false;
4461 }
4462
4463 /// getPreIndexedAddressParts - returns true by value, base pointer and
4464 /// offset pointer and addressing mode by reference if the node's address
4465 /// can be legally represented as pre-indexed load / store address.
4466 bool
4467 ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
4468                                              SDValue &Offset,
4469                                              ISD::MemIndexedMode &AM,
4470                                              SelectionDAG &DAG) const {
4471   if (Subtarget->isThumb1Only())
4472     return false;
4473
4474   EVT VT;
4475   SDValue Ptr;
4476   bool isSEXTLoad = false;
4477   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
4478     Ptr = LD->getBasePtr();
4479     VT  = LD->getMemoryVT();
4480     isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
4481   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
4482     Ptr = ST->getBasePtr();
4483     VT  = ST->getMemoryVT();
4484   } else
4485     return false;
4486
4487   bool isInc;
4488   bool isLegal = false;
4489   if (Subtarget->isThumb2())
4490     isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
4491                                        Offset, isInc, DAG);
4492   else
4493     isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
4494                                         Offset, isInc, DAG);
4495   if (!isLegal)
4496     return false;
4497
4498   AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
4499   return true;
4500 }
4501
4502 /// getPostIndexedAddressParts - returns true by value, base pointer and
4503 /// offset pointer and addressing mode by reference if this node can be
4504 /// combined with a load / store to form a post-indexed load / store.
4505 bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
4506                                                    SDValue &Base,
4507                                                    SDValue &Offset,
4508                                                    ISD::MemIndexedMode &AM,
4509                                                    SelectionDAG &DAG) const {
4510   if (Subtarget->isThumb1Only())
4511     return false;
4512
4513   EVT VT;
4514   SDValue Ptr;
4515   bool isSEXTLoad = false;
4516   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
4517     VT  = LD->getMemoryVT();
4518     Ptr = LD->getBasePtr();
4519     isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
4520   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
4521     VT  = ST->getMemoryVT();
4522     Ptr = ST->getBasePtr();
4523   } else
4524     return false;
4525
4526   bool isInc;
4527   bool isLegal = false;
4528   if (Subtarget->isThumb2())
4529     isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
4530                                        isInc, DAG);
4531   else
4532     isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
4533                                         isInc, DAG);
4534   if (!isLegal)
4535     return false;
4536
4537   if (Ptr != Base) {
4538     // Swap base ptr and offset to catch more post-index load / store when
4539     // it's legal. In Thumb2 mode, offset must be an immediate.
4540     if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
4541         !Subtarget->isThumb2())
4542       std::swap(Base, Offset);
4543
4544     // Post-indexed load / store update the base pointer.
4545     if (Ptr != Base)
4546       return false;
4547   }
4548
4549   AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
4550   return true;
4551 }
4552
4553 void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
4554                                                        const APInt &Mask,
4555                                                        APInt &KnownZero,
4556                                                        APInt &KnownOne,
4557                                                        const SelectionDAG &DAG,
4558                                                        unsigned Depth) const {
4559   KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
4560   switch (Op.getOpcode()) {
4561   default: break;
4562   case ARMISD::CMOV: {
4563     // Bits are known zero/one if known on the LHS and RHS.
4564     DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
4565     if (KnownZero == 0 && KnownOne == 0) return;
4566
4567     APInt KnownZeroRHS, KnownOneRHS;
4568     DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
4569                           KnownZeroRHS, KnownOneRHS, Depth+1);
4570     KnownZero &= KnownZeroRHS;
4571     KnownOne  &= KnownOneRHS;
4572     return;
4573   }
4574   }
4575 }
4576
4577 //===----------------------------------------------------------------------===//
4578 //                           ARM Inline Assembly Support
4579 //===----------------------------------------------------------------------===//
4580
4581 /// getConstraintType - Given a constraint letter, return the type of
4582 /// constraint it is for this target.
4583 ARMTargetLowering::ConstraintType
4584 ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
4585   if (Constraint.size() == 1) {
4586     switch (Constraint[0]) {
4587     default:  break;
4588     case 'l': return C_RegisterClass;
4589     case 'w': return C_RegisterClass;
4590     }
4591   }
4592   return TargetLowering::getConstraintType(Constraint);
4593 }
4594
4595 std::pair<unsigned, const TargetRegisterClass*>
4596 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
4597                                                 EVT VT) const {
4598   if (Constraint.size() == 1) {
4599     // GCC ARM Constraint Letters
4600     switch (Constraint[0]) {
4601     case 'l':
4602       if (Subtarget->isThumb())
4603         return std::make_pair(0U, ARM::tGPRRegisterClass);
4604       else
4605         return std::make_pair(0U, ARM::GPRRegisterClass);
4606     case 'r':
4607       return std::make_pair(0U, ARM::GPRRegisterClass);
4608     case 'w':
4609       if (VT == MVT::f32)
4610         return std::make_pair(0U, ARM::SPRRegisterClass);
4611       if (VT.getSizeInBits() == 64)
4612         return std::make_pair(0U, ARM::DPRRegisterClass);
4613       if (VT.getSizeInBits() == 128)
4614         return std::make_pair(0U, ARM::QPRRegisterClass);
4615       break;
4616     }
4617   }
4618   if (StringRef("{cc}").equals_lower(Constraint))
4619     return std::make_pair(0U, ARM::CCRRegisterClass);
4620
4621   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
4622 }
4623
4624 std::vector<unsigned> ARMTargetLowering::
4625 getRegClassForInlineAsmConstraint(const std::string &Constraint,
4626                                   EVT VT) const {
4627   if (Constraint.size() != 1)
4628     return std::vector<unsigned>();
4629
4630   switch (Constraint[0]) {      // GCC ARM Constraint Letters
4631   default: break;
4632   case 'l':
4633     return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
4634                                  ARM::R4, ARM::R5, ARM::R6, ARM::R7,
4635                                  0);
4636   case 'r':
4637     return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
4638                                  ARM::R4, ARM::R5, ARM::R6, ARM::R7,
4639                                  ARM::R8, ARM::R9, ARM::R10, ARM::R11,
4640                                  ARM::R12, ARM::LR, 0);
4641   case 'w':
4642     if (VT == MVT::f32)
4643       return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3,
4644                                    ARM::S4, ARM::S5, ARM::S6, ARM::S7,
4645                                    ARM::S8, ARM::S9, ARM::S10, ARM::S11,
4646                                    ARM::S12,ARM::S13,ARM::S14,ARM::S15,
4647                                    ARM::S16,ARM::S17,ARM::S18,ARM::S19,
4648                                    ARM::S20,ARM::S21,ARM::S22,ARM::S23,
4649                                    ARM::S24,ARM::S25,ARM::S26,ARM::S27,
4650                                    ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
4651     if (VT.getSizeInBits() == 64)
4652       return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
4653                                    ARM::D4, ARM::D5, ARM::D6, ARM::D7,
4654                                    ARM::D8, ARM::D9, ARM::D10,ARM::D11,
4655                                    ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
4656     if (VT.getSizeInBits() == 128)
4657       return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
4658                                    ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0);
4659       break;
4660   }
4661
4662   return std::vector<unsigned>();
4663 }
4664
4665 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
4666 /// vector.  If it is invalid, don't add anything to Ops.
4667 void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
4668                                                      char Constraint,
4669                                                      bool hasMemory,
4670                                                      std::vector<SDValue>&Ops,
4671                                                      SelectionDAG &DAG) const {
4672   SDValue Result(0, 0);
4673
4674   switch (Constraint) {
4675   default: break;
4676   case 'I': case 'J': case 'K': case 'L':
4677   case 'M': case 'N': case 'O':
4678     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4679     if (!C)
4680       return;
4681
4682     int64_t CVal64 = C->getSExtValue();
4683     int CVal = (int) CVal64;
4684     // None of these constraints allow values larger than 32 bits.  Check
4685     // that the value fits in an int.
4686     if (CVal != CVal64)
4687       return;
4688
4689     switch (Constraint) {
4690       case 'I':
4691         if (Subtarget->isThumb1Only()) {
4692           // This must be a constant between 0 and 255, for ADD
4693           // immediates.
4694           if (CVal >= 0 && CVal <= 255)
4695             break;
4696         } else if (Subtarget->isThumb2()) {
4697           // A constant that can be used as an immediate value in a
4698           // data-processing instruction.
4699           if (ARM_AM::getT2SOImmVal(CVal) != -1)
4700             break;
4701         } else {
4702           // A constant that can be used as an immediate value in a
4703           // data-processing instruction.
4704           if (ARM_AM::getSOImmVal(CVal) != -1)
4705             break;
4706         }
4707         return;
4708
4709       case 'J':
4710         if (Subtarget->isThumb()) {  // FIXME thumb2
4711           // This must be a constant between -255 and -1, for negated ADD
4712           // immediates. This can be used in GCC with an "n" modifier that
4713           // prints the negated value, for use with SUB instructions. It is
4714           // not useful otherwise but is implemented for compatibility.
4715           if (CVal >= -255 && CVal <= -1)
4716             break;
4717         } else {
4718           // This must be a constant between -4095 and 4095. It is not clear
4719           // what this constraint is intended for. Implemented for
4720           // compatibility with GCC.
4721           if (CVal >= -4095 && CVal <= 4095)
4722             break;
4723         }
4724         return;
4725
4726       case 'K':
4727         if (Subtarget->isThumb1Only()) {
4728           // A 32-bit value where only one byte has a nonzero value. Exclude
4729           // zero to match GCC. This constraint is used by GCC internally for
4730           // constants that can be loaded with a move/shift combination.
4731           // It is not useful otherwise but is implemented for compatibility.
4732           if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
4733             break;
4734         } else if (Subtarget->isThumb2()) {
4735           // A constant whose bitwise inverse can be used as an immediate
4736           // value in a data-processing instruction. This can be used in GCC
4737           // with a "B" modifier that prints the inverted value, for use with
4738           // BIC and MVN instructions. It is not useful otherwise but is
4739           // implemented for compatibility.
4740           if (ARM_AM::getT2SOImmVal(~CVal) != -1)
4741             break;
4742         } else {
4743           // A constant whose bitwise inverse can be used as an immediate
4744           // value in a data-processing instruction. This can be used in GCC
4745           // with a "B" modifier that prints the inverted value, for use with
4746           // BIC and MVN instructions. It is not useful otherwise but is
4747           // implemented for compatibility.
4748           if (ARM_AM::getSOImmVal(~CVal) != -1)
4749             break;
4750         }
4751         return;
4752
4753       case 'L':
4754         if (Subtarget->isThumb1Only()) {
4755           // This must be a constant between -7 and 7,
4756           // for 3-operand ADD/SUB immediate instructions.
4757           if (CVal >= -7 && CVal < 7)
4758             break;
4759         } else if (Subtarget->isThumb2()) {
4760           // A constant whose negation can be used as an immediate value in a
4761           // data-processing instruction. This can be used in GCC with an "n"
4762           // modifier that prints the negated value, for use with SUB
4763           // instructions. It is not useful otherwise but is implemented for
4764           // compatibility.
4765           if (ARM_AM::getT2SOImmVal(-CVal) != -1)
4766             break;
4767         } else {
4768           // A constant whose negation can be used as an immediate value in a
4769           // data-processing instruction. This can be used in GCC with an "n"
4770           // modifier that prints the negated value, for use with SUB
4771           // instructions. It is not useful otherwise but is implemented for
4772           // compatibility.
4773           if (ARM_AM::getSOImmVal(-CVal) != -1)
4774             break;
4775         }
4776         return;
4777
4778       case 'M':
4779         if (Subtarget->isThumb()) { // FIXME thumb2
4780           // This must be a multiple of 4 between 0 and 1020, for
4781           // ADD sp + immediate.
4782           if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
4783             break;
4784         } else {
4785           // A power of two or a constant between 0 and 32.  This is used in
4786           // GCC for the shift amount on shifted register operands, but it is
4787           // useful in general for any shift amounts.
4788           if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
4789             break;
4790         }
4791         return;
4792
4793       case 'N':
4794         if (Subtarget->isThumb()) {  // FIXME thumb2
4795           // This must be a constant between 0 and 31, for shift amounts.
4796           if (CVal >= 0 && CVal <= 31)
4797             break;
4798         }
4799         return;
4800
4801       case 'O':
4802         if (Subtarget->isThumb()) {  // FIXME thumb2
4803           // This must be a multiple of 4 between -508 and 508, for
4804           // ADD/SUB sp = sp + immediate.
4805           if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
4806             break;
4807         }
4808         return;
4809     }
4810     Result = DAG.getTargetConstant(CVal, Op.getValueType());
4811     break;
4812   }
4813
4814   if (Result.getNode()) {
4815     Ops.push_back(Result);
4816     return;
4817   }
4818   return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
4819                                                       Ops, DAG);
4820 }
4821
4822 bool
4823 ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
4824   // The ARM target isn't yet aware of offsets.
4825   return false;
4826 }
4827
4828 int ARM::getVFPf32Imm(const APFloat &FPImm) {
4829   APInt Imm = FPImm.bitcastToAPInt();
4830   uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
4831   int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127;  // -126 to 127
4832   int64_t Mantissa = Imm.getZExtValue() & 0x7fffff;  // 23 bits
4833
4834   // We can handle 4 bits of mantissa.
4835   // mantissa = (16+UInt(e:f:g:h))/16.
4836   if (Mantissa & 0x7ffff)
4837     return -1;
4838   Mantissa >>= 19;
4839   if ((Mantissa & 0xf) != Mantissa)
4840     return -1;
4841
4842   // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
4843   if (Exp < -3 || Exp > 4)
4844     return -1;
4845   Exp = ((Exp+3) & 0x7) ^ 4;
4846
4847   return ((int)Sign << 7) | (Exp << 4) | Mantissa;
4848 }
4849
4850 int ARM::getVFPf64Imm(const APFloat &FPImm) {
4851   APInt Imm = FPImm.bitcastToAPInt();
4852   uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
4853   int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023;   // -1022 to 1023
4854   uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL;
4855
4856   // We can handle 4 bits of mantissa.
4857   // mantissa = (16+UInt(e:f:g:h))/16.
4858   if (Mantissa & 0xffffffffffffLL)
4859     return -1;
4860   Mantissa >>= 48;
4861   if ((Mantissa & 0xf) != Mantissa)
4862     return -1;
4863
4864   // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
4865   if (Exp < -3 || Exp > 4)
4866     return -1;
4867   Exp = ((Exp+3) & 0x7) ^ 4;
4868
4869   return ((int)Sign << 7) | (Exp << 4) | Mantissa;
4870 }
4871
4872 /// isFPImmLegal - Returns true if the target can instruction select the
4873 /// specified FP immediate natively. If false, the legalizer will
4874 /// materialize the FP immediate as a load from a constant pool.
4875 bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
4876   if (!Subtarget->hasVFP3())
4877     return false;
4878   if (VT == MVT::f32)
4879     return ARM::getVFPf32Imm(Imm) != -1;
4880   if (VT == MVT::f64)
4881     return ARM::getVFPf64Imm(Imm) != -1;
4882   return false;
4883 }