lib/Target/ARM/ARMISelLowering.cpp

   1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the interfaces that ARM uses to lower LLVM code into a
  11 // selection DAG.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "arm-isel"
  16 #include "ARM.h"
  17 #include "ARMAddressingModes.h"
  18 #include "ARMCallingConv.h"
  19 #include "ARMConstantPoolValue.h"
  20 #include "ARMISelLowering.h"
  21 #include "ARMMachineFunctionInfo.h"
  22 #include "ARMPerfectShuffle.h"
  23 #include "ARMRegisterInfo.h"
  24 #include "ARMSubtarget.h"
  25 #include "ARMTargetMachine.h"
  26 #include "ARMTargetObjectFile.h"
  27 #include "llvm/CallingConv.h"
  28 #include "llvm/Constants.h"
  29 #include "llvm/Function.h"
  30 #include "llvm/GlobalValue.h"
  31 #include "llvm/Instruction.h"
  32 #include "llvm/Instructions.h"
  33 #include "llvm/Intrinsics.h"
  34 #include "llvm/Type.h"
  35 #include "llvm/CodeGen/CallingConvLower.h"
  36 #include "llvm/CodeGen/MachineBasicBlock.h"
  37 #include "llvm/CodeGen/MachineFrameInfo.h"
  38 #include "llvm/CodeGen/MachineFunction.h"
  39 #include "llvm/CodeGen/MachineInstrBuilder.h"
  40 #include "llvm/CodeGen/MachineRegisterInfo.h"
  41 #include "llvm/CodeGen/PseudoSourceValue.h"
  42 #include "llvm/CodeGen/SelectionDAG.h"
  43 #include "llvm/MC/MCSectionMachO.h"
  44 #include "llvm/Target/TargetOptions.h"
  45 #include "llvm/ADT/VectorExtras.h"
  46 #include "llvm/ADT/Statistic.h"
  47 #include "llvm/Support/CommandLine.h"
  48 #include "llvm/Support/ErrorHandling.h"
  49 #include "llvm/Support/MathExtras.h"
  50 #include "llvm/Support/raw_ostream.h"
  51 #include <sstream>
  52 using namespace llvm;
  53
  54 STATISTIC(NumTailCalls, "Number of tail calls");
  55
  56 // This option should go away when tail calls fully work.
  57 static cl::opt<bool>
  58 EnableARMTailCalls("arm-tail-calls", cl::Hidden,
  59   cl::desc("Generate tail calls (TEMPORARY OPTION)."),
  60   cl::init(false));
  61
  62 static cl::opt<bool>
  63 EnableARMLongCalls("arm-long-calls", cl::Hidden,
  64   cl::desc("Generate calls via indirect call instructions"),
  65   cl::init(false));
  66
  67 static cl::opt<bool>
  68 ARMInterworking("arm-interworking", cl::Hidden,
  69   cl::desc("Enable / disable ARM interworking (for debugging only)"),
  70   cl::init(true));
  71
  72 void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
  73                                        EVT PromotedBitwiseVT) {
  74   if (VT != PromotedLdStVT) {
  75     setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
  76     AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
  77                        PromotedLdStVT.getSimpleVT());
  78
  79     setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
  80     AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
  81                        PromotedLdStVT.getSimpleVT());
  82   }
  83
  84   EVT ElemTy = VT.getVectorElementType();
  85   if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
  86     setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
  87   setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
  88   if (ElemTy != MVT::i32) {
  89     setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
  90     setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
  91     setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
  92     setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
  93   }
  94   setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
  95   setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
  96   setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
  97   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
  98   setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
  99   setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
 100   if (VT.isInteger()) {
 101     setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
 102     setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
 103     setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
 104     setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
 105     setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
 106     for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 107          InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
 108       setTruncStoreAction(VT.getSimpleVT(),
 109                           (MVT::SimpleValueType)InnerVT, Expand);
 110   }
 111   setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
 112
 113   // Promote all bit-wise operations.
 114   if (VT.isInteger() && VT != PromotedBitwiseVT) {
 115     setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
 116     AddPromotedToType (ISD::AND, VT.getSimpleVT(),
 117                        PromotedBitwiseVT.getSimpleVT());
 118     setOperationAction(ISD::OR,  VT.getSimpleVT(), Promote);
 119     AddPromotedToType (ISD::OR,  VT.getSimpleVT(),
 120                        PromotedBitwiseVT.getSimpleVT());
 121     setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
 122     AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
 123                        PromotedBitwiseVT.getSimpleVT());
 124   }
 125
 126   // Neon does not support vector divide/remainder operations.
 127   setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
 128   setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
 129   setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
 130   setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
 131   setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
 132   setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
 133 }
 134
 135 void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
 136   addRegisterClass(VT, ARM::DPRRegisterClass);
 137   addTypeForNEON(VT, MVT::f64, MVT::v2i32);
 138 }
 139
 140 void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
 141   addRegisterClass(VT, ARM::QPRRegisterClass);
 142   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
 143 }
 144
 145 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
 146   if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
 147     return new TargetLoweringObjectFileMachO();
 148
 149   return new ARMElfTargetObjectFile();
 150 }
 151
 152 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
 153     : TargetLowering(TM, createTLOF(TM)) {
 154   Subtarget = &TM.getSubtarget<ARMSubtarget>();
 155   RegInfo = TM.getRegisterInfo();
 156   Itins = TM.getInstrItineraryData();
 157
 158   if (Subtarget->isTargetDarwin()) {
 159     // Uses VFP for Thumb libfuncs if available.
 160     if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
 161       // Single-precision floating-point arithmetic.
 162       setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
 163       setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
 164       setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
 165       setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
 166
 167       // Double-precision floating-point arithmetic.
 168       setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
 169       setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
 170       setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
 171       setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
 172
 173       // Single-precision comparisons.
 174       setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
 175       setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
 176       setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
 177       setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
 178       setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
 179       setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
 180       setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
 181       setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
 182
 183       setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
 184       setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
 185       setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
 186       setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
 187       setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
 188       setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
 189       setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
 190       setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
 191
 192       // Double-precision comparisons.
 193       setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
 194       setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
 195       setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
 196       setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
 197       setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
 198       setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
 199       setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
 200       setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
 201
 202       setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
 203       setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
 204       setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
 205       setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
 206       setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
 207       setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
 208       setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
 209       setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
 210
 211       // Floating-point to integer conversions.
 212       // i64 conversions are done via library routines even when generating VFP
 213       // instructions, so use the same ones.
 214       setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
 215       setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
 216       setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
 217       setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
 218
 219       // Conversions between floating types.
 220       setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
 221       setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
 222
 223       // Integer to floating-point conversions.
 224       // i64 conversions are done via library routines even when generating VFP
 225       // instructions, so use the same ones.
 226       // FIXME: There appears to be some naming inconsistency in ARM libgcc:
 227       // e.g., __floatunsidf vs. __floatunssidfvfp.
 228       setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
 229       setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
 230       setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
 231       setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
 232     }
 233   }
 234
 235   // These libcalls are not available in 32-bit.
 236   setLibcallName(RTLIB::SHL_I128, 0);
 237   setLibcallName(RTLIB::SRL_I128, 0);
 238   setLibcallName(RTLIB::SRA_I128, 0);
 239
 240   if (Subtarget->isAAPCS_ABI()) {
 241     // Double-precision floating-point arithmetic helper functions
 242     // RTABI chapter 4.1.2, Table 2
 243     setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
 244     setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
 245     setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
 246     setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
 247     setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
 248     setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
 249     setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
 250     setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
 251
 252     // Double-precision floating-point comparison helper functions
 253     // RTABI chapter 4.1.2, Table 3
 254     setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
 255     setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
 256     setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
 257     setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
 258     setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
 259     setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
 260     setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
 261     setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
 262     setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
 263     setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
 264     setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
 265     setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
 266     setLibcallName(RTLIB::UO_F64,  "__aeabi_dcmpun");
 267     setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
 268     setLibcallName(RTLIB::O_F64,   "__aeabi_dcmpun");
 269     setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
 270     setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
 271     setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
 272     setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
 273     setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
 274     setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
 275     setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
 276     setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
 277     setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
 278
 279     // Single-precision floating-point arithmetic helper functions
 280     // RTABI chapter 4.1.2, Table 4
 281     setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
 282     setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
 283     setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
 284     setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
 285     setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
 286     setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
 287     setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
 288     setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
 289
 290     // Single-precision floating-point comparison helper functions
 291     // RTABI chapter 4.1.2, Table 5
 292     setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
 293     setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
 294     setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
 295     setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
 296     setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
 297     setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
 298     setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
 299     setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
 300     setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
 301     setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
 302     setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
 303     setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
 304     setLibcallName(RTLIB::UO_F32,  "__aeabi_fcmpun");
 305     setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
 306     setLibcallName(RTLIB::O_F32,   "__aeabi_fcmpun");
 307     setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
 308     setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
 309     setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
 310     setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
 311     setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
 312     setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
 313     setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
 314     setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
 315     setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
 316
 317     // Floating-point to integer conversions.
 318     // RTABI chapter 4.1.2, Table 6
 319     setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
 320     setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
 321     setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
 322     setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
 323     setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
 324     setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
 325     setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
 326     setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
 327     setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
 328     setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
 329     setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
 330     setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
 331     setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
 332     setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
 333     setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
 334     setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
 335
 336     // Conversions between floating types.
 337     // RTABI chapter 4.1.2, Table 7
 338     setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
 339     setLibcallName(RTLIB::FPEXT_F32_F64,   "__aeabi_f2d");
 340     setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
 341     setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
 342
 343     // Integer to floating-point conversions.
 344     // RTABI chapter 4.1.2, Table 8
 345     setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
 346     setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
 347     setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
 348     setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
 349     setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
 350     setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
 351     setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
 352     setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
 353     setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
 354     setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
 355     setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
 356     setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
 357     setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
 358     setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
 359     setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
 360     setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
 361
 362     // Long long helper functions
 363     // RTABI chapter 4.2, Table 9
 364     setLibcallName(RTLIB::MUL_I64,  "__aeabi_lmul");
 365     setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
 366     setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
 367     setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
 368     setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
 369     setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
 370     setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
 371     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
 372     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
 373     setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
 374     setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
 375     setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
 376
 377     // Integer division functions
 378     // RTABI chapter 4.3.1
 379     setLibcallName(RTLIB::SDIV_I8,  "__aeabi_idiv");
 380     setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
 381     setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
 382     setLibcallName(RTLIB::UDIV_I8,  "__aeabi_uidiv");
 383     setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
 384     setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
 385     setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
 386     setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
 387     setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
 388     setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
 389     setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
 390     setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
 391   }
 392
 393   if (Subtarget->isThumb1Only())
 394     addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
 395   else
 396     addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
 397   if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
 398     addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
 399     if (!Subtarget->isFPOnlySP())
 400       addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
 401
 402     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 403   }
 404
 405   if (Subtarget->hasNEON()) {
 406     addDRTypeForNEON(MVT::v2f32);
 407     addDRTypeForNEON(MVT::v8i8);
 408     addDRTypeForNEON(MVT::v4i16);
 409     addDRTypeForNEON(MVT::v2i32);
 410     addDRTypeForNEON(MVT::v1i64);
 411
 412     addQRTypeForNEON(MVT::v4f32);
 413     addQRTypeForNEON(MVT::v2f64);
 414     addQRTypeForNEON(MVT::v16i8);
 415     addQRTypeForNEON(MVT::v8i16);
 416     addQRTypeForNEON(MVT::v4i32);
 417     addQRTypeForNEON(MVT::v2i64);
 418
 419     // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
 420     // neither Neon nor VFP support any arithmetic operations on it.
 421     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
 422     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
 423     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
 424     setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
 425     setOperationAction(ISD::FREM, MVT::v2f64, Expand);
 426     setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
 427     setOperationAction(ISD::VSETCC, MVT::v2f64, Expand);
 428     setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
 429     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
 430     setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
 431     setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
 432     setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
 433     setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
 434     setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
 435     setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
 436     setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
 437     setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
 438     setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
 439     setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
 440     setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
 441     setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
 442     setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
 443     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
 444     setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
 445
 446     setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
 447
 448     // Neon does not support some operations on v1i64 and v2i64 types.
 449     setOperationAction(ISD::MUL, MVT::v1i64, Expand);
 450     // Custom handling for some quad-vector types to detect VMULL.
 451     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
 452     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
 453     setOperationAction(ISD::MUL, MVT::v2i64, Custom);
 454     setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
 455     setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
 456
 457     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
 458     setTargetDAGCombine(ISD::SHL);
 459     setTargetDAGCombine(ISD::SRL);
 460     setTargetDAGCombine(ISD::SRA);
 461     setTargetDAGCombine(ISD::SIGN_EXTEND);
 462     setTargetDAGCombine(ISD::ZERO_EXTEND);
 463     setTargetDAGCombine(ISD::ANY_EXTEND);
 464     setTargetDAGCombine(ISD::SELECT_CC);
 465     setTargetDAGCombine(ISD::BUILD_VECTOR);
 466     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
 467   }
 468
 469   computeRegisterProperties();
 470
 471   // ARM does not have f32 extending load.
 472   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
 473
 474   // ARM does not have i1 sign extending load.
 475   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 476
 477   // ARM supports all 4 flavors of integer indexed load / store.
 478   if (!Subtarget->isThumb1Only()) {
 479     for (unsigned im = (unsigned)ISD::PRE_INC;
 480          im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
 481       setIndexedLoadAction(im,  MVT::i1,  Legal);
 482       setIndexedLoadAction(im,  MVT::i8,  Legal);
 483       setIndexedLoadAction(im,  MVT::i16, Legal);
 484       setIndexedLoadAction(im,  MVT::i32, Legal);
 485       setIndexedStoreAction(im, MVT::i1,  Legal);
 486       setIndexedStoreAction(im, MVT::i8,  Legal);
 487       setIndexedStoreAction(im, MVT::i16, Legal);
 488       setIndexedStoreAction(im, MVT::i32, Legal);
 489     }
 490   }
 491
 492   // i64 operation support.
 493   if (Subtarget->isThumb1Only()) {
 494     setOperationAction(ISD::MUL,     MVT::i64, Expand);
 495     setOperationAction(ISD::MULHU,   MVT::i32, Expand);
 496     setOperationAction(ISD::MULHS,   MVT::i32, Expand);
 497     setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 498     setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 499   } else {
 500     setOperationAction(ISD::MUL,     MVT::i64, Expand);
 501     setOperationAction(ISD::MULHU,   MVT::i32, Expand);
 502     if (!Subtarget->hasV6Ops())
 503       setOperationAction(ISD::MULHS, MVT::i32, Expand);
 504   }
 505   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
 506   setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
 507   setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
 508   setOperationAction(ISD::SRL,       MVT::i64, Custom);
 509   setOperationAction(ISD::SRA,       MVT::i64, Custom);
 510
 511   // ARM does not have ROTL.
 512   setOperationAction(ISD::ROTL,  MVT::i32, Expand);
 513   setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
 514   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
 515   if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
 516     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
 517
 518   // Only ARMv6 has BSWAP.
 519   if (!Subtarget->hasV6Ops())
 520     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
 521
 522   // These are expanded into libcalls.
 523   if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) {
 524     // v7M has a hardware divider
 525     setOperationAction(ISD::SDIV,  MVT::i32, Expand);
 526     setOperationAction(ISD::UDIV,  MVT::i32, Expand);
 527   }
 528   setOperationAction(ISD::SREM,  MVT::i32, Expand);
 529   setOperationAction(ISD::UREM,  MVT::i32, Expand);
 530   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
 531   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
 532
 533   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
 534   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
 535   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
 536   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
 537   setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
 538
 539   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 540
 541   // Use the default implementation.
 542   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
 543   setOperationAction(ISD::VAARG,              MVT::Other, Expand);
 544   setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
 545   setOperationAction(ISD::VAEND,              MVT::Other, Expand);
 546   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
 547   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
 548   setOperationAction(ISD::EHSELECTION,        MVT::i32,   Expand);
 549   // FIXME: Shouldn't need this, since no register is used, but the legalizer
 550   // doesn't yet know how to not do that for SjLj.
 551   setExceptionSelectorRegister(ARM::R0);
 552   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
 553   // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
 554   // the default expansion.
 555   if (Subtarget->hasDataBarrier() ||
 556       (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
 557     // membarrier needs custom lowering; the rest are legal and handled
 558     // normally.
 559     setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
 560   } else {
 561     // Set them all for expansion, which will force libcalls.
 562     setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 563     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i8,  Expand);
 564     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i16, Expand);
 565     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
 566     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i8,  Expand);
 567     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i16, Expand);
 568     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
 569     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i8,  Expand);
 570     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i16, Expand);
 571     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
 572     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i8,  Expand);
 573     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i16, Expand);
 574     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
 575     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i8,  Expand);
 576     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i16, Expand);
 577     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
 578     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i8,  Expand);
 579     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i16, Expand);
 580     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
 581     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i8,  Expand);
 582     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i16, Expand);
 583     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
 584     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8,  Expand);
 585     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
 586     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
 587     // Since the libcalls include locking, fold in the fences
 588     setShouldFoldAtomicFences(true);
 589   }
 590   // 64-bit versions are always libcalls (for now)
 591   setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i64, Expand);
 592   setOperationAction(ISD::ATOMIC_SWAP,      MVT::i64, Expand);
 593   setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i64, Expand);
 594   setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i64, Expand);
 595   setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i64, Expand);
 596   setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i64, Expand);
 597   setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i64, Expand);
 598   setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
 599
 600   setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
 601
 602   // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
 603   if (!Subtarget->hasV6Ops()) {
 604     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
 605     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
 606   }
 607   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 608
 609   if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
 610     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
 611     // iff target supports vfp2.
 612     setOperationAction(ISD::BITCAST, MVT::i64, Custom);
 613     setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
 614   }
 615
 616   // We want to custom lower some of our intrinsics.
 617   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 618   if (Subtarget->isTargetDarwin()) {
 619     setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
 620     setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 621     setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom);
 622   }
 623
 624   setOperationAction(ISD::SETCC,     MVT::i32, Expand);
 625   setOperationAction(ISD::SETCC,     MVT::f32, Expand);
 626   setOperationAction(ISD::SETCC,     MVT::f64, Expand);
 627   setOperationAction(ISD::SELECT,    MVT::i32, Custom);
 628   setOperationAction(ISD::SELECT,    MVT::f32, Custom);
 629   setOperationAction(ISD::SELECT,    MVT::f64, Custom);
 630   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
 631   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
 632   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
 633
 634   setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
 635   setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
 636   setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
 637   setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
 638   setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
 639
 640   // We don't support sin/cos/fmod/copysign/pow
 641   setOperationAction(ISD::FSIN,      MVT::f64, Expand);
 642   setOperationAction(ISD::FSIN,      MVT::f32, Expand);
 643   setOperationAction(ISD::FCOS,      MVT::f32, Expand);
 644   setOperationAction(ISD::FCOS,      MVT::f64, Expand);
 645   setOperationAction(ISD::FREM,      MVT::f64, Expand);
 646   setOperationAction(ISD::FREM,      MVT::f32, Expand);
 647   if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
 648     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
 649     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
 650   }
 651   setOperationAction(ISD::FPOW,      MVT::f64, Expand);
 652   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
 653
 654   // Various VFP goodness
 655   if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
 656     // int <-> fp are custom expanded into bit_convert + ARMISD ops.
 657     if (Subtarget->hasVFP2()) {
 658       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 659       setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 660       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 661       setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 662     }
 663     // Special handling for half-precision FP.
 664     if (!Subtarget->hasFP16()) {
 665       setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
 666       setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
 667     }
 668   }
 669
 670   // We have target-specific dag combine patterns for the following nodes:
 671   // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
 672   setTargetDAGCombine(ISD::ADD);
 673   setTargetDAGCombine(ISD::SUB);
 674   setTargetDAGCombine(ISD::MUL);
 675
 676   if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON())
 677     setTargetDAGCombine(ISD::OR);
 678   if (Subtarget->hasNEON())
 679     setTargetDAGCombine(ISD::AND);
 680
 681   setStackPointerRegisterToSaveRestore(ARM::SP);
 682
 683   if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
 684     setSchedulingPreference(Sched::RegPressure);
 685   else
 686     setSchedulingPreference(Sched::Hybrid);
 687
 688   maxStoresPerMemcpy = 1;   //// temporary - rewrite interface to use type
 689
 690   // On ARM arguments smaller than 4 bytes are extended, so all arguments
 691   // are at least 4 bytes aligned.
 692   setMinStackArgumentAlignment(4);
 693
 694   benefitFromCodePlacementOpt = true;
 695 }
 696
 697 std::pair<const TargetRegisterClass*, uint8_t>
 698 ARMTargetLowering::findRepresentativeClass(EVT VT) const{
 699   const TargetRegisterClass *RRC = 0;
 700   uint8_t Cost = 1;
 701   switch (VT.getSimpleVT().SimpleTy) {
 702   default:
 703     return TargetLowering::findRepresentativeClass(VT);
 704   // Use DPR as representative register class for all floating point
 705   // and vector types. Since there are 32 SPR registers and 32 DPR registers so
 706   // the cost is 1 for both f32 and f64.
 707   case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
 708   case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
 709     RRC = ARM::DPRRegisterClass;
 710     break;
 711   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
 712   case MVT::v4f32: case MVT::v2f64:
 713     RRC = ARM::DPRRegisterClass;
 714     Cost = 2;
 715     break;
 716   case MVT::v4i64:
 717     RRC = ARM::DPRRegisterClass;
 718     Cost = 4;
 719     break;
 720   case MVT::v8i64:
 721     RRC = ARM::DPRRegisterClass;
 722     Cost = 8;
 723     break;
 724   }
 725   return std::make_pair(RRC, Cost);
 726 }
 727
 728 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
 729   switch (Opcode) {
 730   default: return 0;
 731   case ARMISD::Wrapper:       return "ARMISD::Wrapper";
 732   case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
 733   case ARMISD::CALL:          return "ARMISD::CALL";
 734   case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
 735   case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
 736   case ARMISD::tCALL:         return "ARMISD::tCALL";
 737   case ARMISD::BRCOND:        return "ARMISD::BRCOND";
 738   case ARMISD::BR_JT:         return "ARMISD::BR_JT";
 739   case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
 740   case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
 741   case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
 742   case ARMISD::CMP:           return "ARMISD::CMP";
 743   case ARMISD::CMPZ:          return "ARMISD::CMPZ";
 744   case ARMISD::CMPFP:         return "ARMISD::CMPFP";
 745   case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
 746   case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
 747   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
 748   case ARMISD::CMOV:          return "ARMISD::CMOV";
 749   case ARMISD::CNEG:          return "ARMISD::CNEG";
 750
 751   case ARMISD::RBIT:          return "ARMISD::RBIT";
 752
 753   case ARMISD::FTOSI:         return "ARMISD::FTOSI";
 754   case ARMISD::FTOUI:         return "ARMISD::FTOUI";
 755   case ARMISD::SITOF:         return "ARMISD::SITOF";
 756   case ARMISD::UITOF:         return "ARMISD::UITOF";
 757
 758   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
 759   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
 760   case ARMISD::RRX:           return "ARMISD::RRX";
 761
 762   case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
 763   case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
 764
 765   case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
 766   case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
 767   case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP";
 768
 769   case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
 770
 771   case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
 772
 773   case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
 774
 775   case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
 776   case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
 777
 778   case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
 779
 780   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
 781   case ARMISD::VCGE:          return "ARMISD::VCGE";
 782   case ARMISD::VCGEU:         return "ARMISD::VCGEU";
 783   case ARMISD::VCGT:          return "ARMISD::VCGT";
 784   case ARMISD::VCGTU:         return "ARMISD::VCGTU";
 785   case ARMISD::VTST:          return "ARMISD::VTST";
 786
 787   case ARMISD::VSHL:          return "ARMISD::VSHL";
 788   case ARMISD::VSHRs:         return "ARMISD::VSHRs";
 789   case ARMISD::VSHRu:         return "ARMISD::VSHRu";
 790   case ARMISD::VSHLLs:        return "ARMISD::VSHLLs";
 791   case ARMISD::VSHLLu:        return "ARMISD::VSHLLu";
 792   case ARMISD::VSHLLi:        return "ARMISD::VSHLLi";
 793   case ARMISD::VSHRN:         return "ARMISD::VSHRN";
 794   case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
 795   case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
 796   case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
 797   case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
 798   case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
 799   case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
 800   case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
 801   case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
 802   case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
 803   case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
 804   case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
 805   case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
 806   case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
 807   case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
 808   case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
 809   case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
 810   case ARMISD::VDUP:          return "ARMISD::VDUP";
 811   case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
 812   case ARMISD::VEXT:          return "ARMISD::VEXT";
 813   case ARMISD::VREV64:        return "ARMISD::VREV64";
 814   case ARMISD::VREV32:        return "ARMISD::VREV32";
 815   case ARMISD::VREV16:        return "ARMISD::VREV16";
 816   case ARMISD::VZIP:          return "ARMISD::VZIP";
 817   case ARMISD::VUZP:          return "ARMISD::VUZP";
 818   case ARMISD::VTRN:          return "ARMISD::VTRN";
 819   case ARMISD::VMULLs:        return "ARMISD::VMULLs";
 820   case ARMISD::VMULLu:        return "ARMISD::VMULLu";
 821   case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
 822   case ARMISD::FMAX:          return "ARMISD::FMAX";
 823   case ARMISD::FMIN:          return "ARMISD::FMIN";
 824   case ARMISD::BFI:           return "ARMISD::BFI";
 825   case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
 826   case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
 827   case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
 828   case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
 829   case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
 830   }
 831 }
 832
 833 /// getRegClassFor - Return the register class that should be used for the
 834 /// specified value type.
 835 TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
 836   // Map v4i64 to QQ registers but do not make the type legal. Similarly map
 837   // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
 838   // load / store 4 to 8 consecutive D registers.
 839   if (Subtarget->hasNEON()) {
 840     if (VT == MVT::v4i64)
 841       return ARM::QQPRRegisterClass;
 842     else if (VT == MVT::v8i64)
 843       return ARM::QQQQPRRegisterClass;
 844   }
 845   return TargetLowering::getRegClassFor(VT);
 846 }
 847
 848 // Create a fast isel object.
 849 FastISel *
 850 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
 851   return ARM::createFastISel(funcInfo);
 852 }
 853
 854 /// getFunctionAlignment - Return the Log2 alignment of this function.
 855 unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
 856   return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
 857 }
 858
 859 /// getMaximalGlobalOffset - Returns the maximal possible offset which can
 860 /// be used for loads / stores from the global.
 861 unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
 862   return (Subtarget->isThumb1Only() ? 127 : 4095);
 863 }
 864
 865 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
 866   unsigned NumVals = N->getNumValues();
 867   if (!NumVals)
 868     return Sched::RegPressure;
 869
 870   for (unsigned i = 0; i != NumVals; ++i) {
 871     EVT VT = N->getValueType(i);
 872     if (VT == MVT::Flag || VT == MVT::Other)
 873       continue;
 874     if (VT.isFloatingPoint() || VT.isVector())
 875       return Sched::Latency;
 876   }
 877
 878   if (!N->isMachineOpcode())
 879     return Sched::RegPressure;
 880
 881   // Load are scheduled for latency even if there instruction itinerary
 882   // is not available.
 883   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
 884   const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
 885
 886   if (TID.getNumDefs() == 0)
 887     return Sched::RegPressure;
 888   if (!Itins->isEmpty() &&
 889       Itins->getOperandCycle(TID.getSchedClass(), 0) > 2)
 890     return Sched::Latency;
 891
 892   return Sched::RegPressure;
 893 }
 894
 895 unsigned
 896 ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
 897                                        MachineFunction &MF) const {
 898   const TargetFrameInfo *TFI = MF.getTarget().getFrameInfo();
 899
 900   switch (RC->getID()) {
 901   default:
 902     return 0;
 903   case ARM::tGPRRegClassID:
 904     return TFI->hasFP(MF) ? 4 : 5;
 905   case ARM::GPRRegClassID: {
 906     unsigned FP = TFI->hasFP(MF) ? 1 : 0;
 907     return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0);
 908   }
 909   case ARM::SPRRegClassID:  // Currently not used as 'rep' register class.
 910   case ARM::DPRRegClassID:
 911     return 32 - 10;
 912   }
 913 }
 914
 915 //===----------------------------------------------------------------------===//
 916 // Lowering Code
 917 //===----------------------------------------------------------------------===//
 918
 919 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
 920 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
 921   switch (CC) {
 922   default: llvm_unreachable("Unknown condition code!");
 923   case ISD::SETNE:  return ARMCC::NE;
 924   case ISD::SETEQ:  return ARMCC::EQ;
 925   case ISD::SETGT:  return ARMCC::GT;
 926   case ISD::SETGE:  return ARMCC::GE;
 927   case ISD::SETLT:  return ARMCC::LT;
 928   case ISD::SETLE:  return ARMCC::LE;
 929   case ISD::SETUGT: return ARMCC::HI;
 930   case ISD::SETUGE: return ARMCC::HS;
 931   case ISD::SETULT: return ARMCC::LO;
 932   case ISD::SETULE: return ARMCC::LS;
 933   }
 934 }
 935
 936 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
 937 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
 938                         ARMCC::CondCodes &CondCode2) {
 939   CondCode2 = ARMCC::AL;
 940   switch (CC) {
 941   default: llvm_unreachable("Unknown FP condition!");
 942   case ISD::SETEQ:
 943   case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
 944   case ISD::SETGT:
 945   case ISD::SETOGT: CondCode = ARMCC::GT; break;
 946   case ISD::SETGE:
 947   case ISD::SETOGE: CondCode = ARMCC::GE; break;
 948   case ISD::SETOLT: CondCode = ARMCC::MI; break;
 949   case ISD::SETOLE: CondCode = ARMCC::LS; break;
 950   case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
 951   case ISD::SETO:   CondCode = ARMCC::VC; break;
 952   case ISD::SETUO:  CondCode = ARMCC::VS; break;
 953   case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
 954   case ISD::SETUGT: CondCode = ARMCC::HI; break;
 955   case ISD::SETUGE: CondCode = ARMCC::PL; break;
 956   case ISD::SETLT:
 957   case ISD::SETULT: CondCode = ARMCC::LT; break;
 958   case ISD::SETLE:
 959   case ISD::SETULE: CondCode = ARMCC::LE; break;
 960   case ISD::SETNE:
 961   case ISD::SETUNE: CondCode = ARMCC::NE; break;
 962   }
 963 }
 964
 965 //===----------------------------------------------------------------------===//
 966 //                      Calling Convention Implementation
 967 //===----------------------------------------------------------------------===//
 968
 969 #include "ARMGenCallingConv.inc"
 970
 971 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the
 972 /// given CallingConvention value.
 973 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
 974                                                  bool Return,
 975                                                  bool isVarArg) const {
 976   switch (CC) {
 977   default:
 978     llvm_unreachable("Unsupported calling convention");
 979   case CallingConv::Fast:
 980     if (Subtarget->hasVFP2() && !isVarArg) {
 981       if (!Subtarget->isAAPCS_ABI())
 982         return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
 983       // For AAPCS ABI targets, just use VFP variant of the calling convention.
 984       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
 985     }
 986     // Fallthrough
 987   case CallingConv::C: {
 988     // Use target triple & subtarget features to do actual dispatch.
 989     if (!Subtarget->isAAPCS_ABI())
 990       return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
 991     else if (Subtarget->hasVFP2() &&
 992              FloatABIType == FloatABI::Hard && !isVarArg)
 993       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
 994     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
 995   }
 996   case CallingConv::ARM_AAPCS_VFP:
 997     return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
 998   case CallingConv::ARM_AAPCS:
 999     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1000   case CallingConv::ARM_APCS:
1001     return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1002   }
1003 }
1004
1005 /// LowerCallResult - Lower the result values of a call into the
1006 /// appropriate copies out of appropriate physical registers.
1007 SDValue
1008 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
1009                                    CallingConv::ID CallConv, bool isVarArg,
1010                                    const SmallVectorImpl<ISD::InputArg> &Ins,
1011                                    DebugLoc dl, SelectionDAG &DAG,
1012                                    SmallVectorImpl<SDValue> &InVals) const {
1013
1014   // Assign locations to each value returned by this call.
1015   SmallVector<CCValAssign, 16> RVLocs;
1016   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1017                  RVLocs, *DAG.getContext());
1018   CCInfo.AnalyzeCallResult(Ins,
1019                            CCAssignFnForNode(CallConv, /* Return*/ true,
1020                                              isVarArg));
1021
1022   // Copy all of the result registers out of their specified physreg.
1023   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1024     CCValAssign VA = RVLocs[i];
1025
1026     SDValue Val;
1027     if (VA.needsCustom()) {
1028       // Handle f64 or half of a v2f64.
1029       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1030                                       InFlag);
1031       Chain = Lo.getValue(1);
1032       InFlag = Lo.getValue(2);
1033       VA = RVLocs[++i]; // skip ahead to next loc
1034       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1035                                       InFlag);
1036       Chain = Hi.getValue(1);
1037       InFlag = Hi.getValue(2);
1038       Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1039
1040       if (VA.getLocVT() == MVT::v2f64) {
1041         SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1042         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1043                           DAG.getConstant(0, MVT::i32));
1044
1045         VA = RVLocs[++i]; // skip ahead to next loc
1046         Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1047         Chain = Lo.getValue(1);
1048         InFlag = Lo.getValue(2);
1049         VA = RVLocs[++i]; // skip ahead to next loc
1050         Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1051         Chain = Hi.getValue(1);
1052         InFlag = Hi.getValue(2);
1053         Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1054         Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1055                           DAG.getConstant(1, MVT::i32));
1056       }
1057     } else {
1058       Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1059                                InFlag);
1060       Chain = Val.getValue(1);
1061       InFlag = Val.getValue(2);
1062     }
1063
1064     switch (VA.getLocInfo()) {
1065     default: llvm_unreachable("Unknown loc info!");
1066     case CCValAssign::Full: break;
1067     case CCValAssign::BCvt:
1068       Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1069       break;
1070     }
1071
1072     InVals.push_back(Val);
1073   }
1074
1075   return Chain;
1076 }
1077
1078 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1079 /// by "Src" to address "Dst" of size "Size".  Alignment information is
1080 /// specified by the specific parameter attribute.  The copy will be passed as
1081 /// a byval function parameter.
1082 /// Sometimes what we are copying is the end of a larger object, the part that
1083 /// does not fit in registers.
1084 static SDValue
1085 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
1086                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
1087                           DebugLoc dl) {
1088   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
1089   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
1090                        /*isVolatile=*/false, /*AlwaysInline=*/false,
1091                        MachinePointerInfo(0), MachinePointerInfo(0));
1092 }
1093
1094 /// LowerMemOpCallTo - Store the argument to the stack.
1095 SDValue
1096 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
1097                                     SDValue StackPtr, SDValue Arg,
1098                                     DebugLoc dl, SelectionDAG &DAG,
1099                                     const CCValAssign &VA,
1100                                     ISD::ArgFlagsTy Flags) const {
1101   unsigned LocMemOffset = VA.getLocMemOffset();
1102   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1103   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
1104   if (Flags.isByVal())
1105     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
1106
1107   return DAG.getStore(Chain, dl, Arg, PtrOff,
1108                       MachinePointerInfo::getStack(LocMemOffset),
1109                       false, false, 0);
1110 }
1111
1112 void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
1113                                          SDValue Chain, SDValue &Arg,
1114                                          RegsToPassVector &RegsToPass,
1115                                          CCValAssign &VA, CCValAssign &NextVA,
1116                                          SDValue &StackPtr,
1117                                          SmallVector<SDValue, 8> &MemOpChains,
1118                                          ISD::ArgFlagsTy Flags) const {
1119
1120   SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1121                               DAG.getVTList(MVT::i32, MVT::i32), Arg);
1122   RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
1123
1124   if (NextVA.isRegLoc())
1125     RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
1126   else {
1127     assert(NextVA.isMemLoc());
1128     if (StackPtr.getNode() == 0)
1129       StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1130
1131     MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
1132                                            dl, DAG, NextVA,
1133                                            Flags));
1134   }
1135 }
1136
1137 /// LowerCall - Lowering a call into a callseq_start <-
1138 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1139 /// nodes.
1140 SDValue
1141 ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1142                              CallingConv::ID CallConv, bool isVarArg,
1143                              bool &isTailCall,
1144                              const SmallVectorImpl<ISD::OutputArg> &Outs,
1145                              const SmallVectorImpl<SDValue> &OutVals,
1146                              const SmallVectorImpl<ISD::InputArg> &Ins,
1147                              DebugLoc dl, SelectionDAG &DAG,
1148                              SmallVectorImpl<SDValue> &InVals) const {
1149   MachineFunction &MF = DAG.getMachineFunction();
1150   bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1151   bool IsSibCall = false;
1152   // Temporarily disable tail calls so things don't break.
1153   if (!EnableARMTailCalls)
1154     isTailCall = false;
1155   if (isTailCall) {
1156     // Check if it's really possible to do a tail call.
1157     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1158                     isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
1159                                                    Outs, OutVals, Ins, DAG);
1160     // We don't support GuaranteedTailCallOpt for ARM, only automatically
1161     // detected sibcalls.
1162     if (isTailCall) {
1163       ++NumTailCalls;
1164       IsSibCall = true;
1165     }
1166   }
1167
1168   // Analyze operands of the call, assigning locations to each operand.
1169   SmallVector<CCValAssign, 16> ArgLocs;
1170   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1171                  *DAG.getContext());
1172   CCInfo.AnalyzeCallOperands(Outs,
1173                              CCAssignFnForNode(CallConv, /* Return*/ false,
1174                                                isVarArg));
1175
1176   // Get a count of how many bytes are to be pushed on the stack.
1177   unsigned NumBytes = CCInfo.getNextStackOffset();
1178
1179   // For tail calls, memory operands are available in our caller's stack.
1180   if (IsSibCall)
1181     NumBytes = 0;
1182
1183   // Adjust the stack pointer for the new arguments...
1184   // These operations are automatically eliminated by the prolog/epilog pass
1185   if (!IsSibCall)
1186     Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1187
1188   SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1189
1190   RegsToPassVector RegsToPass;
1191   SmallVector<SDValue, 8> MemOpChains;
1192
1193   // Walk the register/memloc assignments, inserting copies/loads.  In the case
1194   // of tail call optimization, arguments are handled later.
1195   for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1196        i != e;
1197        ++i, ++realArgIdx) {
1198     CCValAssign &VA = ArgLocs[i];
1199     SDValue Arg = OutVals[realArgIdx];
1200     ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1201
1202     // Promote the value if needed.
1203     switch (VA.getLocInfo()) {
1204     default: llvm_unreachable("Unknown loc info!");
1205     case CCValAssign::Full: break;
1206     case CCValAssign::SExt:
1207       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1208       break;
1209     case CCValAssign::ZExt:
1210       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1211       break;
1212     case CCValAssign::AExt:
1213       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1214       break;
1215     case CCValAssign::BCvt:
1216       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1217       break;
1218     }
1219
1220     // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1221     if (VA.needsCustom()) {
1222       if (VA.getLocVT() == MVT::v2f64) {
1223         SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1224                                   DAG.getConstant(0, MVT::i32));
1225         SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1226                                   DAG.getConstant(1, MVT::i32));
1227
1228         PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1229                          VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1230
1231         VA = ArgLocs[++i]; // skip ahead to next loc
1232         if (VA.isRegLoc()) {
1233           PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1234                            VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1235         } else {
1236           assert(VA.isMemLoc());
1237
1238           MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1239                                                  dl, DAG, VA, Flags));
1240         }
1241       } else {
1242         PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1243                          StackPtr, MemOpChains, Flags);
1244       }
1245     } else if (VA.isRegLoc()) {
1246       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1247     } else if (!IsSibCall) {
1248       assert(VA.isMemLoc());
1249
1250       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1251                                              dl, DAG, VA, Flags));
1252     }
1253   }
1254
1255   if (!MemOpChains.empty())
1256     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1257                         &MemOpChains[0], MemOpChains.size());
1258
1259   // Build a sequence of copy-to-reg nodes chained together with token chain
1260   // and flag operands which copy the outgoing args into the appropriate regs.
1261   SDValue InFlag;
1262   // Tail call byval lowering might overwrite argument registers so in case of
1263   // tail call optimization the copies to registers are lowered later.
1264   if (!isTailCall)
1265     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1266       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1267                                RegsToPass[i].second, InFlag);
1268       InFlag = Chain.getValue(1);
1269     }
1270
1271   // For tail calls lower the arguments to the 'real' stack slot.
1272   if (isTailCall) {
1273     // Force all the incoming stack arguments to be loaded from the stack
1274     // before any new outgoing arguments are stored to the stack, because the
1275     // outgoing stack slots may alias the incoming argument stack slots, and
1276     // the alias isn't otherwise explicit. This is slightly more conservative
1277     // than necessary, because it means that each store effectively depends
1278     // on every argument instead of just those arguments it would clobber.
1279
1280     // Do not flag preceeding copytoreg stuff together with the following stuff.
1281     InFlag = SDValue();
1282     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1283       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1284                                RegsToPass[i].second, InFlag);
1285       InFlag = Chain.getValue(1);
1286     }
1287     InFlag =SDValue();
1288   }
1289
1290   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1291   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1292   // node so that legalize doesn't hack it.
1293   bool isDirect = false;
1294   bool isARMFunc = false;
1295   bool isLocalARMFunc = false;
1296   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1297
1298   if (EnableARMLongCalls) {
1299     assert (getTargetMachine().getRelocationModel() == Reloc::Static
1300             && "long-calls with non-static relocation model!");
1301     // Handle a global address or an external symbol. If it's not one of
1302     // those, the target's already in a register, so we don't need to do
1303     // anything extra.
1304     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1305       const GlobalValue *GV = G->getGlobal();
1306       // Create a constant pool entry for the callee address
1307       unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1308       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
1309                                                            ARMPCLabelIndex,
1310                                                            ARMCP::CPValue, 0);
1311       // Get the address of the callee into a register
1312       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1313       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1314       Callee = DAG.getLoad(getPointerTy(), dl,
1315                            DAG.getEntryNode(), CPAddr,
1316                            MachinePointerInfo::getConstantPool(),
1317                            false, false, 0);
1318     } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
1319       const char *Sym = S->getSymbol();
1320
1321       // Create a constant pool entry for the callee address
1322       unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1323       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1324                                                        Sym, ARMPCLabelIndex, 0);
1325       // Get the address of the callee into a register
1326       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1327       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1328       Callee = DAG.getLoad(getPointerTy(), dl,
1329                            DAG.getEntryNode(), CPAddr,
1330                            MachinePointerInfo::getConstantPool(),
1331                            false, false, 0);
1332     }
1333   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1334     const GlobalValue *GV = G->getGlobal();
1335     isDirect = true;
1336     bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
1337     bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
1338                    getTargetMachine().getRelocationModel() != Reloc::Static;
1339     isARMFunc = !Subtarget->isThumb() || isStub;
1340     // ARM call to a local ARM function is predicable.
1341     isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
1342     // tBX takes a register source operand.
1343     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1344       unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1345       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
1346                                                            ARMPCLabelIndex,
1347                                                            ARMCP::CPValue, 4);
1348       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1349       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1350       Callee = DAG.getLoad(getPointerTy(), dl,
1351                            DAG.getEntryNode(), CPAddr,
1352                            MachinePointerInfo::getConstantPool(),
1353                            false, false, 0);
1354       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1355       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1356                            getPointerTy(), Callee, PICLabel);
1357     } else {
1358       // On ELF targets for PIC code, direct calls should go through the PLT
1359       unsigned OpFlags = 0;
1360       if (Subtarget->isTargetELF() &&
1361                   getTargetMachine().getRelocationModel() == Reloc::PIC_)
1362         OpFlags = ARMII::MO_PLT;
1363       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
1364     }
1365   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1366     isDirect = true;
1367     bool isStub = Subtarget->isTargetDarwin() &&
1368                   getTargetMachine().getRelocationModel() != Reloc::Static;
1369     isARMFunc = !Subtarget->isThumb() || isStub;
1370     // tBX takes a register source operand.
1371     const char *Sym = S->getSymbol();
1372     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1373       unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1374       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1375                                                        Sym, ARMPCLabelIndex, 4);
1376       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1377       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1378       Callee = DAG.getLoad(getPointerTy(), dl,
1379                            DAG.getEntryNode(), CPAddr,
1380                            MachinePointerInfo::getConstantPool(),
1381                            false, false, 0);
1382       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1383       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1384                            getPointerTy(), Callee, PICLabel);
1385     } else {
1386       unsigned OpFlags = 0;
1387       // On ELF targets for PIC code, direct calls should go through the PLT
1388       if (Subtarget->isTargetELF() &&
1389                   getTargetMachine().getRelocationModel() == Reloc::PIC_)
1390         OpFlags = ARMII::MO_PLT;
1391       Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
1392     }
1393   }
1394
1395   // FIXME: handle tail calls differently.
1396   unsigned CallOpc;
1397   if (Subtarget->isThumb()) {
1398     if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
1399       CallOpc = ARMISD::CALL_NOLINK;
1400     else
1401       CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
1402   } else {
1403     CallOpc = (isDirect || Subtarget->hasV5TOps())
1404       ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
1405       : ARMISD::CALL_NOLINK;
1406   }
1407
1408   std::vector<SDValue> Ops;
1409   Ops.push_back(Chain);
1410   Ops.push_back(Callee);
1411
1412   // Add argument registers to the end of the list so that they are known live
1413   // into the call.
1414   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1415     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1416                                   RegsToPass[i].second.getValueType()));
1417
1418   if (InFlag.getNode())
1419     Ops.push_back(InFlag);
1420
1421   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1422   if (isTailCall)
1423     return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
1424
1425   // Returns a chain and a flag for retval copy to use.
1426   Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
1427   InFlag = Chain.getValue(1);
1428
1429   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
1430                              DAG.getIntPtrConstant(0, true), InFlag);
1431   if (!Ins.empty())
1432     InFlag = Chain.getValue(1);
1433
1434   // Handle result values, copying them out of physregs into vregs that we
1435   // return.
1436   return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
1437                          dl, DAG, InVals);
1438 }
1439
1440 /// MatchingStackOffset - Return true if the given stack call argument is
1441 /// already available in the same position (relatively) of the caller's
1442 /// incoming argument stack.
1443 static
1444 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
1445                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
1446                          const ARMInstrInfo *TII) {
1447   unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
1448   int FI = INT_MAX;
1449   if (Arg.getOpcode() == ISD::CopyFromReg) {
1450     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
1451     if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
1452       return false;
1453     MachineInstr *Def = MRI->getVRegDef(VR);
1454     if (!Def)
1455       return false;
1456     if (!Flags.isByVal()) {
1457       if (!TII->isLoadFromStackSlot(Def, FI))
1458         return false;
1459     } else {
1460       return false;
1461     }
1462   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
1463     if (Flags.isByVal())
1464       // ByVal argument is passed in as a pointer but it's now being
1465       // dereferenced. e.g.
1466       // define @foo(%struct.X* %A) {
1467       //   tail call @bar(%struct.X* byval %A)
1468       // }
1469       return false;
1470     SDValue Ptr = Ld->getBasePtr();
1471     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
1472     if (!FINode)
1473       return false;
1474     FI = FINode->getIndex();
1475   } else
1476     return false;
1477
1478   assert(FI != INT_MAX);
1479   if (!MFI->isFixedObjectIndex(FI))
1480     return false;
1481   return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
1482 }
1483
1484 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
1485 /// for tail call optimization. Targets which want to do tail call
1486 /// optimization should implement this function.
1487 bool
1488 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
1489                                                      CallingConv::ID CalleeCC,
1490                                                      bool isVarArg,
1491                                                      bool isCalleeStructRet,
1492                                                      bool isCallerStructRet,
1493                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1494                                     const SmallVectorImpl<SDValue> &OutVals,
1495                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1496                                                      SelectionDAG& DAG) const {
1497   const Function *CallerF = DAG.getMachineFunction().getFunction();
1498   CallingConv::ID CallerCC = CallerF->getCallingConv();
1499   bool CCMatch = CallerCC == CalleeCC;
1500
1501   // Look for obvious safe cases to perform tail call optimization that do not
1502   // require ABI changes. This is what gcc calls sibcall.
1503
1504   // Do not sibcall optimize vararg calls unless the call site is not passing
1505   // any arguments.
1506   if (isVarArg && !Outs.empty())
1507     return false;
1508
1509   // Also avoid sibcall optimization if either caller or callee uses struct
1510   // return semantics.
1511   if (isCalleeStructRet || isCallerStructRet)
1512     return false;
1513
1514   // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
1515   // emitEpilogue is not ready for them.
1516   // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
1517   // LR.  This means if we need to reload LR, it takes an extra instructions,
1518   // which outweighs the value of the tail call; but here we don't know yet
1519   // whether LR is going to be used.  Probably the right approach is to
1520   // generate the tail call here and turn it back into CALL/RET in
1521   // emitEpilogue if LR is used.
1522   if (Subtarget->isThumb1Only())
1523     return false;
1524
1525   // For the moment, we can only do this to functions defined in this
1526   // compilation, or to indirect calls.  A Thumb B to an ARM function,
1527   // or vice versa, is not easily fixed up in the linker unlike BL.
1528   // (We could do this by loading the address of the callee into a register;
1529   // that is an extra instruction over the direct call and burns a register
1530   // as well, so is not likely to be a win.)
1531
1532   // It might be safe to remove this restriction on non-Darwin.
1533
1534   // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
1535   // but we need to make sure there are enough registers; the only valid
1536   // registers are the 4 used for parameters.  We don't currently do this
1537   // case.
1538   if (isa<ExternalSymbolSDNode>(Callee))
1539       return false;
1540
1541   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1542     const GlobalValue *GV = G->getGlobal();
1543     if (GV->isDeclaration() || GV->isWeakForLinker())
1544       return false;
1545   }
1546
1547   // If the calling conventions do not match, then we'd better make sure the
1548   // results are returned in the same way as what the caller expects.
1549   if (!CCMatch) {
1550     SmallVector<CCValAssign, 16> RVLocs1;
1551     CCState CCInfo1(CalleeCC, false, getTargetMachine(),
1552                     RVLocs1, *DAG.getContext());
1553     CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
1554
1555     SmallVector<CCValAssign, 16> RVLocs2;
1556     CCState CCInfo2(CallerCC, false, getTargetMachine(),
1557                     RVLocs2, *DAG.getContext());
1558     CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
1559
1560     if (RVLocs1.size() != RVLocs2.size())
1561       return false;
1562     for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
1563       if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
1564         return false;
1565       if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
1566         return false;
1567       if (RVLocs1[i].isRegLoc()) {
1568         if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
1569           return false;
1570       } else {
1571         if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
1572           return false;
1573       }
1574     }
1575   }
1576
1577   // If the callee takes no arguments then go on to check the results of the
1578   // call.
1579   if (!Outs.empty()) {
1580     // Check if stack adjustment is needed. For now, do not do this if any
1581     // argument is passed on the stack.
1582     SmallVector<CCValAssign, 16> ArgLocs;
1583     CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
1584                    ArgLocs, *DAG.getContext());
1585     CCInfo.AnalyzeCallOperands(Outs,
1586                                CCAssignFnForNode(CalleeCC, false, isVarArg));
1587     if (CCInfo.getNextStackOffset()) {
1588       MachineFunction &MF = DAG.getMachineFunction();
1589
1590       // Check if the arguments are already laid out in the right way as
1591       // the caller's fixed stack objects.
1592       MachineFrameInfo *MFI = MF.getFrameInfo();
1593       const MachineRegisterInfo *MRI = &MF.getRegInfo();
1594       const ARMInstrInfo *TII =
1595         ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
1596       for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1597            i != e;
1598            ++i, ++realArgIdx) {
1599         CCValAssign &VA = ArgLocs[i];
1600         EVT RegVT = VA.getLocVT();
1601         SDValue Arg = OutVals[realArgIdx];
1602         ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1603         if (VA.getLocInfo() == CCValAssign::Indirect)
1604           return false;
1605         if (VA.needsCustom()) {
1606           // f64 and vector types are split into multiple registers or
1607           // register/stack-slot combinations.  The types will not match
1608           // the registers; give up on memory f64 refs until we figure
1609           // out what to do about this.
1610           if (!VA.isRegLoc())
1611             return false;
1612           if (!ArgLocs[++i].isRegLoc())
1613             return false;
1614           if (RegVT == MVT::v2f64) {
1615             if (!ArgLocs[++i].isRegLoc())
1616               return false;
1617             if (!ArgLocs[++i].isRegLoc())
1618               return false;
1619           }
1620         } else if (!VA.isRegLoc()) {
1621           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
1622                                    MFI, MRI, TII))
1623             return false;
1624         }
1625       }
1626     }
1627   }
1628
1629   return true;
1630 }
1631
1632 SDValue
1633 ARMTargetLowering::LowerReturn(SDValue Chain,
1634                                CallingConv::ID CallConv, bool isVarArg,
1635                                const SmallVectorImpl<ISD::OutputArg> &Outs,
1636                                const SmallVectorImpl<SDValue> &OutVals,
1637                                DebugLoc dl, SelectionDAG &DAG) const {
1638
1639   // CCValAssign - represent the assignment of the return value to a location.
1640   SmallVector<CCValAssign, 16> RVLocs;
1641
1642   // CCState - Info about the registers and stack slots.
1643   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
1644                  *DAG.getContext());
1645
1646   // Analyze outgoing return values.
1647   CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
1648                                                isVarArg));
1649
1650   // If this is the first return lowered for this function, add
1651   // the regs to the liveout set for the function.
1652   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1653     for (unsigned i = 0; i != RVLocs.size(); ++i)
1654       if (RVLocs[i].isRegLoc())
1655         DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1656   }
1657
1658   SDValue Flag;
1659
1660   // Copy the result values into the output registers.
1661   for (unsigned i = 0, realRVLocIdx = 0;
1662        i != RVLocs.size();
1663        ++i, ++realRVLocIdx) {
1664     CCValAssign &VA = RVLocs[i];
1665     assert(VA.isRegLoc() && "Can only return in registers!");
1666
1667     SDValue Arg = OutVals[realRVLocIdx];
1668
1669     switch (VA.getLocInfo()) {
1670     default: llvm_unreachable("Unknown loc info!");
1671     case CCValAssign::Full: break;
1672     case CCValAssign::BCvt:
1673       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1674       break;
1675     }
1676
1677     if (VA.needsCustom()) {
1678       if (VA.getLocVT() == MVT::v2f64) {
1679         // Extract the first half and return it in two registers.
1680         SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1681                                    DAG.getConstant(0, MVT::i32));
1682         SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
1683                                        DAG.getVTList(MVT::i32, MVT::i32), Half);
1684
1685         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
1686         Flag = Chain.getValue(1);
1687         VA = RVLocs[++i]; // skip ahead to next loc
1688         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1689                                  HalfGPRs.getValue(1), Flag);
1690         Flag = Chain.getValue(1);
1691         VA = RVLocs[++i]; // skip ahead to next loc
1692
1693         // Extract the 2nd half and fall through to handle it as an f64 value.
1694         Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1695                           DAG.getConstant(1, MVT::i32));
1696       }
1697       // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
1698       // available.
1699       SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1700                                   DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
1701       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
1702       Flag = Chain.getValue(1);
1703       VA = RVLocs[++i]; // skip ahead to next loc
1704       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
1705                                Flag);
1706     } else
1707       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
1708
1709     // Guarantee that all emitted copies are
1710     // stuck together, avoiding something bad.
1711     Flag = Chain.getValue(1);
1712   }
1713
1714   SDValue result;
1715   if (Flag.getNode())
1716     result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1717   else // Return Void
1718     result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
1719
1720   return result;
1721 }
1722
1723 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
1724 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
1725 // one of the above mentioned nodes. It has to be wrapped because otherwise
1726 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
1727 // be used to form addressing mode. These wrapped nodes will be selected
1728 // into MOVi.
1729 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
1730   EVT PtrVT = Op.getValueType();
1731   // FIXME there is no actual debug info here
1732   DebugLoc dl = Op.getDebugLoc();
1733   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1734   SDValue Res;
1735   if (CP->isMachineConstantPoolEntry())
1736     Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
1737                                     CP->getAlignment());
1738   else
1739     Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
1740                                     CP->getAlignment());
1741   return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
1742 }
1743
1744 unsigned ARMTargetLowering::getJumpTableEncoding() const {
1745   return MachineJumpTableInfo::EK_Inline;
1746 }
1747
1748 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
1749                                              SelectionDAG &DAG) const {
1750   MachineFunction &MF = DAG.getMachineFunction();
1751   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1752   unsigned ARMPCLabelIndex = 0;
1753   DebugLoc DL = Op.getDebugLoc();
1754   EVT PtrVT = getPointerTy();
1755   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1756   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1757   SDValue CPAddr;
1758   if (RelocM == Reloc::Static) {
1759     CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
1760   } else {
1761     unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
1762     ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1763     ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
1764                                                          ARMCP::CPBlockAddress,
1765                                                          PCAdj);
1766     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1767   }
1768   CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
1769   SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
1770                                MachinePointerInfo::getConstantPool(),
1771                                false, false, 0);
1772   if (RelocM == Reloc::Static)
1773     return Result;
1774   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1775   return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
1776 }
1777
1778 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
1779 SDValue
1780 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1781                                                  SelectionDAG &DAG) const {
1782   DebugLoc dl = GA->getDebugLoc();
1783   EVT PtrVT = getPointerTy();
1784   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1785   MachineFunction &MF = DAG.getMachineFunction();
1786   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1787   unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1788   ARMConstantPoolValue *CPV =
1789     new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
1790                              ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
1791   SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1792   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
1793   Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
1794                          MachinePointerInfo::getConstantPool(),
1795                          false, false, 0);
1796   SDValue Chain = Argument.getValue(1);
1797
1798   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1799   Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
1800
1801   // call __tls_get_addr.
1802   ArgListTy Args;
1803   ArgListEntry Entry;
1804   Entry.Node = Argument;
1805   Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
1806   Args.push_back(Entry);
1807   // FIXME: is there useful debug info available here?
1808   std::pair<SDValue, SDValue> CallResult =
1809     LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
1810                 false, false, false, false,
1811                 0, CallingConv::C, false, /*isReturnValueUsed=*/true,
1812                 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
1813   return CallResult.first;
1814 }
1815
1816 // Lower ISD::GlobalTLSAddress using the "initial exec" or
1817 // "local exec" model.
1818 SDValue
1819 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
1820                                         SelectionDAG &DAG) const {
1821   const GlobalValue *GV = GA->getGlobal();
1822   DebugLoc dl = GA->getDebugLoc();
1823   SDValue Offset;
1824   SDValue Chain = DAG.getEntryNode();
1825   EVT PtrVT = getPointerTy();
1826   // Get the Thread Pointer
1827   SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
1828
1829   if (GV->isDeclaration()) {
1830     MachineFunction &MF = DAG.getMachineFunction();
1831     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1832     unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1833     // Initial exec model.
1834     unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1835     ARMConstantPoolValue *CPV =
1836       new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
1837                                ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true);
1838     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1839     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
1840     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1841                          MachinePointerInfo::getConstantPool(),
1842                          false, false, 0);
1843     Chain = Offset.getValue(1);
1844
1845     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1846     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
1847
1848     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1849                          MachinePointerInfo::getConstantPool(),
1850                          false, false, 0);
1851   } else {
1852     // local exec model
1853     ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::TPOFF);
1854     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1855     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
1856     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1857                          MachinePointerInfo::getConstantPool(),
1858                          false, false, 0);
1859   }
1860
1861   // The address of the thread local variable is the add of the thread
1862   // pointer with the offset of the variable.
1863   return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
1864 }
1865
1866 SDValue
1867 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
1868   // TODO: implement the "local dynamic" model
1869   assert(Subtarget->isTargetELF() &&
1870          "TLS not implemented for non-ELF targets");
1871   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1872   // If the relocation model is PIC, use the "General Dynamic" TLS Model,
1873   // otherwise use the "Local Exec" TLS Model
1874   if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
1875     return LowerToTLSGeneralDynamicModel(GA, DAG);
1876   else
1877     return LowerToTLSExecModels(GA, DAG);
1878 }
1879
1880 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
1881                                                  SelectionDAG &DAG) const {
1882   EVT PtrVT = getPointerTy();
1883   DebugLoc dl = Op.getDebugLoc();
1884   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1885   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1886   if (RelocM == Reloc::PIC_) {
1887     bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
1888     ARMConstantPoolValue *CPV =
1889       new ARMConstantPoolValue(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
1890     SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1891     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1892     SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
1893                                  CPAddr,
1894                                  MachinePointerInfo::getConstantPool(),
1895                                  false, false, 0);
1896     SDValue Chain = Result.getValue(1);
1897     SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
1898     Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
1899     if (!UseGOTOFF)
1900       Result = DAG.getLoad(PtrVT, dl, Chain, Result,
1901                            MachinePointerInfo::getGOT(), false, false, 0);
1902     return Result;
1903   } else {
1904     // If we have T2 ops, we can materialize the address directly via movt/movw
1905     // pair. This is always cheaper.
1906     if (Subtarget->useMovt()) {
1907       return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
1908                          DAG.getTargetGlobalAddress(GV, dl, PtrVT));
1909     } else {
1910       SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
1911       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1912       return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1913                          MachinePointerInfo::getConstantPool(),
1914                          false, false, 0);
1915     }
1916   }
1917 }
1918
1919 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
1920                                                     SelectionDAG &DAG) const {
1921   MachineFunction &MF = DAG.getMachineFunction();
1922   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1923   unsigned ARMPCLabelIndex = 0;
1924   EVT PtrVT = getPointerTy();
1925   DebugLoc dl = Op.getDebugLoc();
1926   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1927   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1928   SDValue CPAddr;
1929   if (RelocM == Reloc::Static)
1930     CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
1931   else {
1932     ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1933     unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
1934     ARMConstantPoolValue *CPV =
1935       new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
1936     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1937   }
1938   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1939
1940   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1941                                MachinePointerInfo::getConstantPool(),
1942                                false, false, 0);
1943   SDValue Chain = Result.getValue(1);
1944
1945   if (RelocM == Reloc::PIC_) {
1946     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1947     Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1948   }
1949
1950   if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
1951     Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
1952                          false, false, 0);
1953
1954   return Result;
1955 }
1956
1957 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
1958                                                     SelectionDAG &DAG) const {
1959   assert(Subtarget->isTargetELF() &&
1960          "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
1961   MachineFunction &MF = DAG.getMachineFunction();
1962   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1963   unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1964   EVT PtrVT = getPointerTy();
1965   DebugLoc dl = Op.getDebugLoc();
1966   unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
1967   ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1968                                                        "_GLOBAL_OFFSET_TABLE_",
1969                                                        ARMPCLabelIndex, PCAdj);
1970   SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1971   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1972   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1973                                MachinePointerInfo::getConstantPool(),
1974                                false, false, 0);
1975   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1976   return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1977 }
1978
1979 SDValue
1980 ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG)
1981   const {
1982   DebugLoc dl = Op.getDebugLoc();
1983   return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
1984                      Op.getOperand(0), Op.getOperand(1));
1985 }
1986
1987 SDValue
1988 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
1989   DebugLoc dl = Op.getDebugLoc();
1990   SDValue Val = DAG.getConstant(0, MVT::i32);
1991   return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
1992                      Op.getOperand(1), Val);
1993 }
1994
1995 SDValue
1996 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
1997   DebugLoc dl = Op.getDebugLoc();
1998   return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
1999                      Op.getOperand(1), DAG.getConstant(0, MVT::i32));
2000 }
2001
2002 SDValue
2003 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
2004                                           const ARMSubtarget *Subtarget) const {
2005   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2006   DebugLoc dl = Op.getDebugLoc();
2007   switch (IntNo) {
2008   default: return SDValue();    // Don't custom lower most intrinsics.
2009   case Intrinsic::arm_thread_pointer: {
2010     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2011     return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2012   }
2013   case Intrinsic::eh_sjlj_lsda: {
2014     MachineFunction &MF = DAG.getMachineFunction();
2015     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2016     unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
2017     EVT PtrVT = getPointerTy();
2018     DebugLoc dl = Op.getDebugLoc();
2019     Reloc::Model RelocM = getTargetMachine().getRelocationModel();
2020     SDValue CPAddr;
2021     unsigned PCAdj = (RelocM != Reloc::PIC_)
2022       ? 0 : (Subtarget->isThumb() ? 4 : 8);
2023     ARMConstantPoolValue *CPV =
2024       new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex,
2025                                ARMCP::CPLSDA, PCAdj);
2026     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2027     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2028     SDValue Result =
2029       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2030                   MachinePointerInfo::getConstantPool(),
2031                   false, false, 0);
2032
2033     if (RelocM == Reloc::PIC_) {
2034       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2035       Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
2036     }
2037     return Result;
2038   }
2039   }
2040 }
2041
2042 static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
2043                                const ARMSubtarget *Subtarget) {
2044   DebugLoc dl = Op.getDebugLoc();
2045   if (!Subtarget->hasDataBarrier()) {
2046     // Some ARMv6 cpus can support data barriers with an mcr instruction.
2047     // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
2048     // here.
2049     assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
2050            "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
2051     return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
2052                        DAG.getConstant(0, MVT::i32));
2053   }
2054
2055   SDValue Op5 = Op.getOperand(5);
2056   bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
2057   unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
2058   unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2059   bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
2060
2061   ARM_MB::MemBOpt DMBOpt;
2062   if (isDeviceBarrier)
2063     DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
2064   else
2065     DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
2066   return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
2067                      DAG.getConstant(DMBOpt, MVT::i32));
2068 }
2069
2070 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
2071                              const ARMSubtarget *Subtarget) {
2072   // ARM pre v5TE and Thumb1 does not have preload instructions.
2073   if (!(Subtarget->isThumb2() ||
2074         (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
2075     // Just preserve the chain.
2076     return Op.getOperand(0);
2077
2078   DebugLoc dl = Op.getDebugLoc();
2079   unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
2080   if (!isRead &&
2081       (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
2082     // ARMv7 with MP extension has PLDW.
2083     return Op.getOperand(0);
2084
2085   if (Subtarget->isThumb())
2086     // Invert the bits.
2087     isRead = ~isRead & 1;
2088   unsigned isData = Subtarget->isThumb() ? 0 : 1;
2089
2090   // Currently there is no intrinsic that matches pli.
2091   return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
2092                      Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
2093                      DAG.getConstant(isData, MVT::i32));
2094 }
2095
2096 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
2097   MachineFunction &MF = DAG.getMachineFunction();
2098   ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
2099
2100   // vastart just stores the address of the VarArgsFrameIndex slot into the
2101   // memory location argument.
2102   DebugLoc dl = Op.getDebugLoc();
2103   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2104   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2105   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2106   return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2107                       MachinePointerInfo(SV), false, false, 0);
2108 }
2109
2110 SDValue
2111 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
2112                                         SDValue &Root, SelectionDAG &DAG,
2113                                         DebugLoc dl) const {
2114   MachineFunction &MF = DAG.getMachineFunction();
2115   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2116
2117   TargetRegisterClass *RC;
2118   if (AFI->isThumb1OnlyFunction())
2119     RC = ARM::tGPRRegisterClass;
2120   else
2121     RC = ARM::GPRRegisterClass;
2122
2123   // Transform the arguments stored in physical registers into virtual ones.
2124   unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2125   SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2126
2127   SDValue ArgValue2;
2128   if (NextVA.isMemLoc()) {
2129     MachineFrameInfo *MFI = MF.getFrameInfo();
2130     int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
2131
2132     // Create load node to retrieve arguments from the stack.
2133     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2134     ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
2135                             MachinePointerInfo::getFixedStack(FI),
2136                             false, false, 0);
2137   } else {
2138     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2139     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2140   }
2141
2142   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
2143 }
2144
2145 SDValue
2146 ARMTargetLowering::LowerFormalArguments(SDValue Chain,
2147                                         CallingConv::ID CallConv, bool isVarArg,
2148                                         const SmallVectorImpl<ISD::InputArg>
2149                                           &Ins,
2150                                         DebugLoc dl, SelectionDAG &DAG,
2151                                         SmallVectorImpl<SDValue> &InVals)
2152                                           const {
2153
2154   MachineFunction &MF = DAG.getMachineFunction();
2155   MachineFrameInfo *MFI = MF.getFrameInfo();
2156
2157   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2158
2159   // Assign locations to all of the incoming arguments.
2160   SmallVector<CCValAssign, 16> ArgLocs;
2161   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
2162                  *DAG.getContext());
2163   CCInfo.AnalyzeFormalArguments(Ins,
2164                                 CCAssignFnForNode(CallConv, /* Return*/ false,
2165                                                   isVarArg));
2166
2167   SmallVector<SDValue, 16> ArgValues;
2168
2169   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2170     CCValAssign &VA = ArgLocs[i];
2171
2172     // Arguments stored in registers.
2173     if (VA.isRegLoc()) {
2174       EVT RegVT = VA.getLocVT();
2175
2176       SDValue ArgValue;
2177       if (VA.needsCustom()) {
2178         // f64 and vector types are split up into multiple registers or
2179         // combinations of registers and stack slots.
2180         if (VA.getLocVT() == MVT::v2f64) {
2181           SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
2182                                                    Chain, DAG, dl);
2183           VA = ArgLocs[++i]; // skip ahead to next loc
2184           SDValue ArgValue2;
2185           if (VA.isMemLoc()) {
2186             int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
2187             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2188             ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
2189                                     MachinePointerInfo::getFixedStack(FI),
2190                                     false, false, 0);
2191           } else {
2192             ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
2193                                              Chain, DAG, dl);
2194           }
2195           ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2196           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
2197                                  ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
2198           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
2199                                  ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
2200         } else
2201           ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
2202
2203       } else {
2204         TargetRegisterClass *RC;
2205
2206         if (RegVT == MVT::f32)
2207           RC = ARM::SPRRegisterClass;
2208         else if (RegVT == MVT::f64)
2209           RC = ARM::DPRRegisterClass;
2210         else if (RegVT == MVT::v2f64)
2211           RC = ARM::QPRRegisterClass;
2212         else if (RegVT == MVT::i32)
2213           RC = (AFI->isThumb1OnlyFunction() ?
2214                 ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
2215         else
2216           llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
2217
2218         // Transform the arguments in physical registers into virtual ones.
2219         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2220         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2221       }
2222
2223       // If this is an 8 or 16-bit value, it is really passed promoted
2224       // to 32 bits.  Insert an assert[sz]ext to capture this, then
2225       // truncate to the right size.
2226       switch (VA.getLocInfo()) {
2227       default: llvm_unreachable("Unknown loc info!");
2228       case CCValAssign::Full: break;
2229       case CCValAssign::BCvt:
2230         ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
2231         break;
2232       case CCValAssign::SExt:
2233         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2234                                DAG.getValueType(VA.getValVT()));
2235         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2236         break;
2237       case CCValAssign::ZExt:
2238         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
2239                                DAG.getValueType(VA.getValVT()));
2240         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2241         break;
2242       }
2243
2244       InVals.push_back(ArgValue);
2245
2246     } else { // VA.isRegLoc()
2247
2248       // sanity check
2249       assert(VA.isMemLoc());
2250       assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
2251
2252       unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
2253       int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true);
2254
2255       // Create load nodes to retrieve arguments from the stack.
2256       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2257       InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
2258                                    MachinePointerInfo::getFixedStack(FI),
2259                                    false, false, 0));
2260     }
2261   }
2262
2263   // varargs
2264   if (isVarArg) {
2265     static const unsigned GPRArgRegs[] = {
2266       ARM::R0, ARM::R1, ARM::R2, ARM::R3
2267     };
2268
2269     unsigned NumGPRs = CCInfo.getFirstUnallocated
2270       (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
2271
2272     unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
2273     unsigned VARegSize = (4 - NumGPRs) * 4;
2274     unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
2275     unsigned ArgOffset = CCInfo.getNextStackOffset();
2276     if (VARegSaveSize) {
2277       // If this function is vararg, store any remaining integer argument regs
2278       // to their spots on the stack so that they may be loaded by deferencing
2279       // the result of va_next.
2280       AFI->setVarArgsRegSaveSize(VARegSaveSize);
2281       AFI->setVarArgsFrameIndex(
2282         MFI->CreateFixedObject(VARegSaveSize,
2283                                ArgOffset + VARegSaveSize - VARegSize,
2284                                false));
2285       SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
2286                                       getPointerTy());
2287
2288       SmallVector<SDValue, 4> MemOps;
2289       for (; NumGPRs < 4; ++NumGPRs) {
2290         TargetRegisterClass *RC;
2291         if (AFI->isThumb1OnlyFunction())
2292           RC = ARM::tGPRRegisterClass;
2293         else
2294           RC = ARM::GPRRegisterClass;
2295
2296         unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
2297         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
2298         SDValue Store =
2299           DAG.getStore(Val.getValue(1), dl, Val, FIN,
2300                MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
2301                        false, false, 0);
2302         MemOps.push_back(Store);
2303         FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
2304                           DAG.getConstant(4, getPointerTy()));
2305       }
2306       if (!MemOps.empty())
2307         Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2308                             &MemOps[0], MemOps.size());
2309     } else
2310       // This will point to the next argument passed via stack.
2311       AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
2312   }
2313
2314   return Chain;
2315 }
2316
2317 /// isFloatingPointZero - Return true if this is +0.0.
2318 static bool isFloatingPointZero(SDValue Op) {
2319   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
2320     return CFP->getValueAPF().isPosZero();
2321   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
2322     // Maybe this has already been legalized into the constant pool?
2323     if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
2324       SDValue WrapperOp = Op.getOperand(1).getOperand(0);
2325       if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
2326         if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
2327           return CFP->getValueAPF().isPosZero();
2328     }
2329   }
2330   return false;
2331 }
2332
2333 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
2334 /// the given operands.
2335 SDValue
2336 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2337                              SDValue &ARMcc, SelectionDAG &DAG,
2338                              DebugLoc dl) const {
2339   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2340     unsigned C = RHSC->getZExtValue();
2341     if (!isLegalICmpImmediate(C)) {
2342       // Constant does not fit, try adjusting it by one?
2343       switch (CC) {
2344       default: break;
2345       case ISD::SETLT:
2346       case ISD::SETGE:
2347         if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
2348           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2349           RHS = DAG.getConstant(C-1, MVT::i32);
2350         }
2351         break;
2352       case ISD::SETULT:
2353       case ISD::SETUGE:
2354         if (C != 0 && isLegalICmpImmediate(C-1)) {
2355           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2356           RHS = DAG.getConstant(C-1, MVT::i32);
2357         }
2358         break;
2359       case ISD::SETLE:
2360       case ISD::SETGT:
2361         if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
2362           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2363           RHS = DAG.getConstant(C+1, MVT::i32);
2364         }
2365         break;
2366       case ISD::SETULE:
2367       case ISD::SETUGT:
2368         if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
2369           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2370           RHS = DAG.getConstant(C+1, MVT::i32);
2371         }
2372         break;
2373       }
2374     }
2375   }
2376
2377   ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
2378   ARMISD::NodeType CompareType;
2379   switch (CondCode) {
2380   default:
2381     CompareType = ARMISD::CMP;
2382     break;
2383   case ARMCC::EQ:
2384   case ARMCC::NE:
2385     // Uses only Z Flag
2386     CompareType = ARMISD::CMPZ;
2387     break;
2388   }
2389   ARMcc = DAG.getConstant(CondCode, MVT::i32);
2390   return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
2391 }
2392
2393 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
2394 SDValue
2395 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
2396                              DebugLoc dl) const {
2397   SDValue Cmp;
2398   if (!isFloatingPointZero(RHS))
2399     Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
2400   else
2401     Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS);
2402   return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
2403 }
2404
2405 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
2406   SDValue Cond = Op.getOperand(0);
2407   SDValue SelectTrue = Op.getOperand(1);
2408   SDValue SelectFalse = Op.getOperand(2);
2409   DebugLoc dl = Op.getDebugLoc();
2410
2411   // Convert:
2412   //
2413   //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
2414   //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
2415   //
2416   if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
2417     const ConstantSDNode *CMOVTrue =
2418       dyn_cast<ConstantSDNode>(Cond.getOperand(0));
2419     const ConstantSDNode *CMOVFalse =
2420       dyn_cast<ConstantSDNode>(Cond.getOperand(1));
2421
2422     if (CMOVTrue && CMOVFalse) {
2423       unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
2424       unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
2425
2426       SDValue True;
2427       SDValue False;
2428       if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
2429         True = SelectTrue;
2430         False = SelectFalse;
2431       } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
2432         True = SelectFalse;
2433         False = SelectTrue;
2434       }
2435
2436       if (True.getNode() && False.getNode()) {
2437         EVT VT = Cond.getValueType();
2438         SDValue ARMcc = Cond.getOperand(2);
2439         SDValue CCR = Cond.getOperand(3);
2440         SDValue Cmp = Cond.getOperand(4);
2441         return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
2442       }
2443     }
2444   }
2445
2446   return DAG.getSelectCC(dl, Cond,
2447                          DAG.getConstant(0, Cond.getValueType()),
2448                          SelectTrue, SelectFalse, ISD::SETNE);
2449 }
2450
2451 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
2452   EVT VT = Op.getValueType();
2453   SDValue LHS = Op.getOperand(0);
2454   SDValue RHS = Op.getOperand(1);
2455   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2456   SDValue TrueVal = Op.getOperand(2);
2457   SDValue FalseVal = Op.getOperand(3);
2458   DebugLoc dl = Op.getDebugLoc();
2459
2460   if (LHS.getValueType() == MVT::i32) {
2461     SDValue ARMcc;
2462     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2463     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2464     return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
2465   }
2466
2467   ARMCC::CondCodes CondCode, CondCode2;
2468   FPCCToARMCC(CC, CondCode, CondCode2);
2469
2470   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
2471   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
2472   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2473   SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
2474                                ARMcc, CCR, Cmp);
2475   if (CondCode2 != ARMCC::AL) {
2476     SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
2477     // FIXME: Needs another CMP because flag can have but one use.
2478     SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
2479     Result = DAG.getNode(ARMISD::CMOV, dl, VT,
2480                          Result, TrueVal, ARMcc2, CCR, Cmp2);
2481   }
2482   return Result;
2483 }
2484
2485 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
2486 /// to morph to an integer compare sequence.
2487 static bool canChangeToInt(SDValue Op, bool &SeenZero,
2488                            const ARMSubtarget *Subtarget) {
2489   SDNode *N = Op.getNode();
2490   if (!N->hasOneUse())
2491     // Otherwise it requires moving the value from fp to integer registers.
2492     return false;
2493   if (!N->getNumValues())
2494     return false;
2495   EVT VT = Op.getValueType();
2496   if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
2497     // f32 case is generally profitable. f64 case only makes sense when vcmpe +
2498     // vmrs are very slow, e.g. cortex-a8.
2499     return false;
2500
2501   if (isFloatingPointZero(Op)) {
2502     SeenZero = true;
2503     return true;
2504   }
2505   return ISD::isNormalLoad(N);
2506 }
2507
2508 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
2509   if (isFloatingPointZero(Op))
2510     return DAG.getConstant(0, MVT::i32);
2511
2512   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
2513     return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2514                        Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
2515                        Ld->isVolatile(), Ld->isNonTemporal(),
2516                        Ld->getAlignment());
2517
2518   llvm_unreachable("Unknown VFP cmp argument!");
2519 }
2520
2521 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
2522                            SDValue &RetVal1, SDValue &RetVal2) {
2523   if (isFloatingPointZero(Op)) {
2524     RetVal1 = DAG.getConstant(0, MVT::i32);
2525     RetVal2 = DAG.getConstant(0, MVT::i32);
2526     return;
2527   }
2528
2529   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
2530     SDValue Ptr = Ld->getBasePtr();
2531     RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2532                           Ld->getChain(), Ptr,
2533                           Ld->getPointerInfo(),
2534                           Ld->isVolatile(), Ld->isNonTemporal(),
2535                           Ld->getAlignment());
2536
2537     EVT PtrType = Ptr.getValueType();
2538     unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
2539     SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
2540                                  PtrType, Ptr, DAG.getConstant(4, PtrType));
2541     RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2542                           Ld->getChain(), NewPtr,
2543                           Ld->getPointerInfo().getWithOffset(4),
2544                           Ld->isVolatile(), Ld->isNonTemporal(),
2545                           NewAlign);
2546     return;
2547   }
2548
2549   llvm_unreachable("Unknown VFP cmp argument!");
2550 }
2551
2552 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
2553 /// f32 and even f64 comparisons to integer ones.
2554 SDValue
2555 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
2556   SDValue Chain = Op.getOperand(0);
2557   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2558   SDValue LHS = Op.getOperand(2);
2559   SDValue RHS = Op.getOperand(3);
2560   SDValue Dest = Op.getOperand(4);
2561   DebugLoc dl = Op.getDebugLoc();
2562
2563   bool SeenZero = false;
2564   if (canChangeToInt(LHS, SeenZero, Subtarget) &&
2565       canChangeToInt(RHS, SeenZero, Subtarget) &&
2566       // If one of the operand is zero, it's safe to ignore the NaN case since
2567       // we only care about equality comparisons.
2568       (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
2569     // If unsafe fp math optimization is enabled and there are no othter uses of
2570     // the CMP operands, and the condition code is EQ oe NE, we can optimize it
2571     // to an integer comparison.
2572     if (CC == ISD::SETOEQ)
2573       CC = ISD::SETEQ;
2574     else if (CC == ISD::SETUNE)
2575       CC = ISD::SETNE;
2576
2577     SDValue ARMcc;
2578     if (LHS.getValueType() == MVT::f32) {
2579       LHS = bitcastf32Toi32(LHS, DAG);
2580       RHS = bitcastf32Toi32(RHS, DAG);
2581       SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2582       SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2583       return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
2584                          Chain, Dest, ARMcc, CCR, Cmp);
2585     }
2586
2587     SDValue LHS1, LHS2;
2588     SDValue RHS1, RHS2;
2589     expandf64Toi32(LHS, DAG, LHS1, LHS2);
2590     expandf64Toi32(RHS, DAG, RHS1, RHS2);
2591     ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
2592     ARMcc = DAG.getConstant(CondCode, MVT::i32);
2593     SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
2594     SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
2595     return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
2596   }
2597
2598   return SDValue();
2599 }
2600
2601 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
2602   SDValue Chain = Op.getOperand(0);
2603   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2604   SDValue LHS = Op.getOperand(2);
2605   SDValue RHS = Op.getOperand(3);
2606   SDValue Dest = Op.getOperand(4);
2607   DebugLoc dl = Op.getDebugLoc();
2608
2609   if (LHS.getValueType() == MVT::i32) {
2610     SDValue ARMcc;
2611     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2612     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2613     return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
2614                        Chain, Dest, ARMcc, CCR, Cmp);
2615   }
2616
2617   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
2618
2619   if (UnsafeFPMath &&
2620       (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
2621        CC == ISD::SETNE || CC == ISD::SETUNE)) {
2622     SDValue Result = OptimizeVFPBrcond(Op, DAG);
2623     if (Result.getNode())
2624       return Result;
2625   }
2626
2627   ARMCC::CondCodes CondCode, CondCode2;
2628   FPCCToARMCC(CC, CondCode, CondCode2);
2629
2630   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
2631   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
2632   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2633   SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
2634   SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
2635   SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
2636   if (CondCode2 != ARMCC::AL) {
2637     ARMcc = DAG.getConstant(CondCode2, MVT::i32);
2638     SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
2639     Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
2640   }
2641   return Res;
2642 }
2643
2644 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
2645   SDValue Chain = Op.getOperand(0);
2646   SDValue Table = Op.getOperand(1);
2647   SDValue Index = Op.getOperand(2);
2648   DebugLoc dl = Op.getDebugLoc();
2649
2650   EVT PTy = getPointerTy();
2651   JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
2652   ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2653   SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
2654   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
2655   Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
2656   Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
2657   SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
2658   if (Subtarget->isThumb2()) {
2659     // Thumb2 uses a two-level jump. That is, it jumps into the jump table
2660     // which does another jump to the destination. This also makes it easier
2661     // to translate it to TBB / TBH later.
2662     // FIXME: This might not work if the function is extremely large.
2663     return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
2664                        Addr, Op.getOperand(2), JTI, UId);
2665   }
2666   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
2667     Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
2668                        MachinePointerInfo::getJumpTable(),
2669                        false, false, 0);
2670     Chain = Addr.getValue(1);
2671     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
2672     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
2673   } else {
2674     Addr = DAG.getLoad(PTy, dl, Chain, Addr,
2675                        MachinePointerInfo::getJumpTable(), false, false, 0);
2676     Chain = Addr.getValue(1);
2677     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
2678   }
2679 }
2680
2681 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
2682   DebugLoc dl = Op.getDebugLoc();
2683   unsigned Opc;
2684
2685   switch (Op.getOpcode()) {
2686   default:
2687     assert(0 && "Invalid opcode!");
2688   case ISD::FP_TO_SINT:
2689     Opc = ARMISD::FTOSI;
2690     break;
2691   case ISD::FP_TO_UINT:
2692     Opc = ARMISD::FTOUI;
2693     break;
2694   }
2695   Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
2696   return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
2697 }
2698
2699 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2700   EVT VT = Op.getValueType();
2701   DebugLoc dl = Op.getDebugLoc();
2702   unsigned Opc;
2703
2704   switch (Op.getOpcode()) {
2705   default:
2706     assert(0 && "Invalid opcode!");
2707   case ISD::SINT_TO_FP:
2708     Opc = ARMISD::SITOF;
2709     break;
2710   case ISD::UINT_TO_FP:
2711     Opc = ARMISD::UITOF;
2712     break;
2713   }
2714
2715   Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
2716   return DAG.getNode(Opc, dl, VT, Op);
2717 }
2718
2719 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
2720   // Implement fcopysign with a fabs and a conditional fneg.
2721   SDValue Tmp0 = Op.getOperand(0);
2722   SDValue Tmp1 = Op.getOperand(1);
2723   DebugLoc dl = Op.getDebugLoc();
2724   EVT VT = Op.getValueType();
2725   EVT SrcVT = Tmp1.getValueType();
2726   SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
2727   SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32);
2728   SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
2729   SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl);
2730   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2731   return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp);
2732 }
2733
2734 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
2735   MachineFunction &MF = DAG.getMachineFunction();
2736   MachineFrameInfo *MFI = MF.getFrameInfo();
2737   MFI->setReturnAddressIsTaken(true);
2738
2739   EVT VT = Op.getValueType();
2740   DebugLoc dl = Op.getDebugLoc();
2741   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2742   if (Depth) {
2743     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
2744     SDValue Offset = DAG.getConstant(4, MVT::i32);
2745     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
2746                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
2747                        MachinePointerInfo(), false, false, 0);
2748   }
2749
2750   // Return LR, which contains the return address. Mark it an implicit live-in.
2751   unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
2752   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
2753 }
2754
2755 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
2756   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
2757   MFI->setFrameAddressIsTaken(true);
2758
2759   EVT VT = Op.getValueType();
2760   DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
2761   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2762   unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
2763     ? ARM::R7 : ARM::R11;
2764   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
2765   while (Depth--)
2766     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
2767                             MachinePointerInfo(),
2768                             false, false, 0);
2769   return FrameAddr;
2770 }
2771
2772 /// ExpandBITCAST - If the target supports VFP, this function is called to
2773 /// expand a bit convert where either the source or destination type is i64 to
2774 /// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
2775 /// operand type is illegal (e.g., v2f32 for a target that doesn't support
2776 /// vectors), since the legalizer won't know what to do with that.
2777 static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
2778   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2779   DebugLoc dl = N->getDebugLoc();
2780   SDValue Op = N->getOperand(0);
2781
2782   // This function is only supposed to be called for i64 types, either as the
2783   // source or destination of the bit convert.
2784   EVT SrcVT = Op.getValueType();
2785   EVT DstVT = N->getValueType(0);
2786   assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
2787          "ExpandBITCAST called for non-i64 type");
2788
2789   // Turn i64->f64 into VMOVDRR.
2790   if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
2791     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
2792                              DAG.getConstant(0, MVT::i32));
2793     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
2794                              DAG.getConstant(1, MVT::i32));
2795     return DAG.getNode(ISD::BITCAST, dl, DstVT,
2796                        DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
2797   }
2798
2799   // Turn f64->i64 into VMOVRRD.
2800   if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
2801     SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
2802                               DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
2803     // Merge the pieces into a single i64 value.
2804     return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
2805   }
2806
2807   return SDValue();
2808 }
2809
2810 /// getZeroVector - Returns a vector of specified type with all zero elements.
2811 /// Zero vectors are used to represent vector negation and in those cases
2812 /// will be implemented with the NEON VNEG instruction.  However, VNEG does
2813 /// not support i64 elements, so sometimes the zero vectors will need to be
2814 /// explicitly constructed.  Regardless, use a canonical VMOV to create the
2815 /// zero vector.
2816 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
2817   assert(VT.isVector() && "Expected a vector type");
2818   // The canonical modified immediate encoding of a zero vector is....0!
2819   SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
2820   EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
2821   SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
2822   return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
2823 }
2824
2825 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
2826 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
2827 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
2828                                                 SelectionDAG &DAG) const {
2829   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
2830   EVT VT = Op.getValueType();
2831   unsigned VTBits = VT.getSizeInBits();
2832   DebugLoc dl = Op.getDebugLoc();
2833   SDValue ShOpLo = Op.getOperand(0);
2834   SDValue ShOpHi = Op.getOperand(1);
2835   SDValue ShAmt  = Op.getOperand(2);
2836   SDValue ARMcc;
2837   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
2838
2839   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
2840
2841   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
2842                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
2843   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
2844   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
2845                                    DAG.getConstant(VTBits, MVT::i32));
2846   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
2847   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
2848   SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
2849
2850   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2851   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
2852                           ARMcc, DAG, dl);
2853   SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
2854   SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
2855                            CCR, Cmp);
2856
2857   SDValue Ops[2] = { Lo, Hi };
2858   return DAG.getMergeValues(Ops, 2, dl);
2859 }
2860
2861 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
2862 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
2863 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
2864                                                SelectionDAG &DAG) const {
2865   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
2866   EVT VT = Op.getValueType();
2867   unsigned VTBits = VT.getSizeInBits();
2868   DebugLoc dl = Op.getDebugLoc();
2869   SDValue ShOpLo = Op.getOperand(0);
2870   SDValue ShOpHi = Op.getOperand(1);
2871   SDValue ShAmt  = Op.getOperand(2);
2872   SDValue ARMcc;
2873
2874   assert(Op.getOpcode() == ISD::SHL_PARTS);
2875   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
2876                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
2877   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
2878   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
2879                                    DAG.getConstant(VTBits, MVT::i32));
2880   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
2881   SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
2882
2883   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
2884   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2885   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
2886                           ARMcc, DAG, dl);
2887   SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
2888   SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
2889                            CCR, Cmp);
2890
2891   SDValue Ops[2] = { Lo, Hi };
2892   return DAG.getMergeValues(Ops, 2, dl);
2893 }
2894
2895 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
2896                                             SelectionDAG &DAG) const {
2897   // The rounding mode is in bits 23:22 of the FPSCR.
2898   // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
2899   // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
2900   // so that the shift + and get folded into a bitfield extract.
2901   DebugLoc dl = Op.getDebugLoc();
2902   SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
2903                               DAG.getConstant(Intrinsic::arm_get_fpscr,
2904                                               MVT::i32));
2905   SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
2906                                   DAG.getConstant(1U << 22, MVT::i32));
2907   SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
2908                               DAG.getConstant(22, MVT::i32));
2909   return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
2910                      DAG.getConstant(3, MVT::i32));
2911 }
2912
2913 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
2914                          const ARMSubtarget *ST) {
2915   EVT VT = N->getValueType(0);
2916   DebugLoc dl = N->getDebugLoc();
2917
2918   if (!ST->hasV6T2Ops())
2919     return SDValue();
2920
2921   SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
2922   return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
2923 }
2924
2925 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
2926                           const ARMSubtarget *ST) {
2927   EVT VT = N->getValueType(0);
2928   DebugLoc dl = N->getDebugLoc();
2929
2930   if (!VT.isVector())
2931     return SDValue();
2932
2933   // Lower vector shifts on NEON to use VSHL.
2934   assert(ST->hasNEON() && "unexpected vector shift");
2935
2936   // Left shifts translate directly to the vshiftu intrinsic.
2937   if (N->getOpcode() == ISD::SHL)
2938     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
2939                        DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
2940                        N->getOperand(0), N->getOperand(1));
2941
2942   assert((N->getOpcode() == ISD::SRA ||
2943           N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
2944
2945   // NEON uses the same intrinsics for both left and right shifts.  For
2946   // right shifts, the shift amounts are negative, so negate the vector of
2947   // shift amounts.
2948   EVT ShiftVT = N->getOperand(1).getValueType();
2949   SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
2950                                      getZeroVector(ShiftVT, DAG, dl),
2951                                      N->getOperand(1));
2952   Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
2953                              Intrinsic::arm_neon_vshifts :
2954                              Intrinsic::arm_neon_vshiftu);
2955   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
2956                      DAG.getConstant(vshiftInt, MVT::i32),
2957                      N->getOperand(0), NegatedCount);
2958 }
2959
2960 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
2961                                 const ARMSubtarget *ST) {
2962   EVT VT = N->getValueType(0);
2963   DebugLoc dl = N->getDebugLoc();
2964
2965   // We can get here for a node like i32 = ISD::SHL i32, i64
2966   if (VT != MVT::i64)
2967     return SDValue();
2968
2969   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
2970          "Unknown shift to lower!");
2971
2972   // We only lower SRA, SRL of 1 here, all others use generic lowering.
2973   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
2974       cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
2975     return SDValue();
2976
2977   // If we are in thumb mode, we don't have RRX.
2978   if (ST->isThumb1Only()) return SDValue();
2979
2980   // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
2981   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
2982                            DAG.getConstant(0, MVT::i32));
2983   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
2984                            DAG.getConstant(1, MVT::i32));
2985
2986   // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
2987   // captures the result into a carry flag.
2988   unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
2989   Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1);
2990
2991   // The low part is an ARMISD::RRX operand, which shifts the carry in.
2992   Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
2993
2994   // Merge the pieces into a single i64 value.
2995  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
2996 }
2997
2998 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
2999   SDValue TmpOp0, TmpOp1;
3000   bool Invert = false;
3001   bool Swap = false;
3002   unsigned Opc = 0;
3003
3004   SDValue Op0 = Op.getOperand(0);
3005   SDValue Op1 = Op.getOperand(1);
3006   SDValue CC = Op.getOperand(2);
3007   EVT VT = Op.getValueType();
3008   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
3009   DebugLoc dl = Op.getDebugLoc();
3010
3011   if (Op.getOperand(1).getValueType().isFloatingPoint()) {
3012     switch (SetCCOpcode) {
3013     default: llvm_unreachable("Illegal FP comparison"); break;
3014     case ISD::SETUNE:
3015     case ISD::SETNE:  Invert = true; // Fallthrough
3016     case ISD::SETOEQ:
3017     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
3018     case ISD::SETOLT:
3019     case ISD::SETLT: Swap = true; // Fallthrough
3020     case ISD::SETOGT:
3021     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
3022     case ISD::SETOLE:
3023     case ISD::SETLE:  Swap = true; // Fallthrough
3024     case ISD::SETOGE:
3025     case ISD::SETGE: Opc = ARMISD::VCGE; break;
3026     case ISD::SETUGE: Swap = true; // Fallthrough
3027     case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
3028     case ISD::SETUGT: Swap = true; // Fallthrough
3029     case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
3030     case ISD::SETUEQ: Invert = true; // Fallthrough
3031     case ISD::SETONE:
3032       // Expand this to (OLT | OGT).
3033       TmpOp0 = Op0;
3034       TmpOp1 = Op1;
3035       Opc = ISD::OR;
3036       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
3037       Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
3038       break;
3039     case ISD::SETUO: Invert = true; // Fallthrough
3040     case ISD::SETO:
3041       // Expand this to (OLT | OGE).
3042       TmpOp0 = Op0;
3043       TmpOp1 = Op1;
3044       Opc = ISD::OR;
3045       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
3046       Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
3047       break;
3048     }
3049   } else {
3050     // Integer comparisons.
3051     switch (SetCCOpcode) {
3052     default: llvm_unreachable("Illegal integer comparison"); break;
3053     case ISD::SETNE:  Invert = true;
3054     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
3055     case ISD::SETLT:  Swap = true;
3056     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
3057     case ISD::SETLE:  Swap = true;
3058     case ISD::SETGE:  Opc = ARMISD::VCGE; break;
3059     case ISD::SETULT: Swap = true;
3060     case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
3061     case ISD::SETULE: Swap = true;
3062     case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
3063     }
3064
3065     // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
3066     if (Opc == ARMISD::VCEQ) {
3067
3068       SDValue AndOp;
3069       if (ISD::isBuildVectorAllZeros(Op1.getNode()))
3070         AndOp = Op0;
3071       else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
3072         AndOp = Op1;
3073
3074       // Ignore bitconvert.
3075       if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
3076         AndOp = AndOp.getOperand(0);
3077
3078       if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
3079         Opc = ARMISD::VTST;
3080         Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
3081         Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
3082         Invert = !Invert;
3083       }
3084     }
3085   }
3086
3087   if (Swap)
3088     std::swap(Op0, Op1);
3089
3090   // If one of the operands is a constant vector zero, attempt to fold the
3091   // comparison to a specialized compare-against-zero form.
3092   SDValue SingleOp;
3093   if (ISD::isBuildVectorAllZeros(Op1.getNode()))
3094     SingleOp = Op0;
3095   else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
3096     if (Opc == ARMISD::VCGE)
3097       Opc = ARMISD::VCLEZ;
3098     else if (Opc == ARMISD::VCGT)
3099       Opc = ARMISD::VCLTZ;
3100     SingleOp = Op1;
3101   }
3102
3103   SDValue Result;
3104   if (SingleOp.getNode()) {
3105     switch (Opc) {
3106     case ARMISD::VCEQ:
3107       Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
3108     case ARMISD::VCGE:
3109       Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
3110     case ARMISD::VCLEZ:
3111       Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
3112     case ARMISD::VCGT:
3113       Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
3114     case ARMISD::VCLTZ:
3115       Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
3116     default:
3117       Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
3118     }
3119   } else {
3120      Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
3121   }
3122
3123   if (Invert)
3124     Result = DAG.getNOT(dl, Result, VT);
3125
3126   return Result;
3127 }
3128
3129 /// isNEONModifiedImm - Check if the specified splat value corresponds to a
3130 /// valid vector constant for a NEON instruction with a "modified immediate"
3131 /// operand (e.g., VMOV).  If so, return the encoded value.
3132 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
3133                                  unsigned SplatBitSize, SelectionDAG &DAG,
3134                                  EVT &VT, bool is128Bits, NEONModImmType type) {
3135   unsigned OpCmode, Imm;
3136
3137   // SplatBitSize is set to the smallest size that splats the vector, so a
3138   // zero vector will always have SplatBitSize == 8.  However, NEON modified
3139   // immediate instructions others than VMOV do not support the 8-bit encoding
3140   // of a zero vector, and the default encoding of zero is supposed to be the
3141   // 32-bit version.
3142   if (SplatBits == 0)
3143     SplatBitSize = 32;
3144
3145   switch (SplatBitSize) {
3146   case 8:
3147     if (type != VMOVModImm)
3148       return SDValue();
3149     // Any 1-byte value is OK.  Op=0, Cmode=1110.
3150     assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
3151     OpCmode = 0xe;
3152     Imm = SplatBits;
3153     VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
3154     break;
3155
3156   case 16:
3157     // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
3158     VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
3159     if ((SplatBits & ~0xff) == 0) {
3160       // Value = 0x00nn: Op=x, Cmode=100x.
3161       OpCmode = 0x8;
3162       Imm = SplatBits;
3163       break;
3164     }
3165     if ((SplatBits & ~0xff00) == 0) {
3166       // Value = 0xnn00: Op=x, Cmode=101x.
3167       OpCmode = 0xa;
3168       Imm = SplatBits >> 8;
3169       break;
3170     }
3171     return SDValue();
3172
3173   case 32:
3174     // NEON's 32-bit VMOV supports splat values where:
3175     // * only one byte is nonzero, or
3176     // * the least significant byte is 0xff and the second byte is nonzero, or
3177     // * the least significant 2 bytes are 0xff and the third is nonzero.
3178     VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
3179     if ((SplatBits & ~0xff) == 0) {
3180       // Value = 0x000000nn: Op=x, Cmode=000x.
3181       OpCmode = 0;
3182       Imm = SplatBits;
3183       break;
3184     }
3185     if ((SplatBits & ~0xff00) == 0) {
3186       // Value = 0x0000nn00: Op=x, Cmode=001x.
3187       OpCmode = 0x2;
3188       Imm = SplatBits >> 8;
3189       break;
3190     }
3191     if ((SplatBits & ~0xff0000) == 0) {
3192       // Value = 0x00nn0000: Op=x, Cmode=010x.
3193       OpCmode = 0x4;
3194       Imm = SplatBits >> 16;
3195       break;
3196     }
3197     if ((SplatBits & ~0xff000000) == 0) {
3198       // Value = 0xnn000000: Op=x, Cmode=011x.
3199       OpCmode = 0x6;
3200       Imm = SplatBits >> 24;
3201       break;
3202     }
3203
3204     // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
3205     if (type == OtherModImm) return SDValue();
3206
3207     if ((SplatBits & ~0xffff) == 0 &&
3208         ((SplatBits | SplatUndef) & 0xff) == 0xff) {
3209       // Value = 0x0000nnff: Op=x, Cmode=1100.
3210       OpCmode = 0xc;
3211       Imm = SplatBits >> 8;
3212       SplatBits |= 0xff;
3213       break;
3214     }
3215
3216     if ((SplatBits & ~0xffffff) == 0 &&
3217         ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
3218       // Value = 0x00nnffff: Op=x, Cmode=1101.
3219       OpCmode = 0xd;
3220       Imm = SplatBits >> 16;
3221       SplatBits |= 0xffff;
3222       break;
3223     }
3224
3225     // Note: there are a few 32-bit splat values (specifically: 00ffff00,
3226     // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
3227     // VMOV.I32.  A (very) minor optimization would be to replicate the value
3228     // and fall through here to test for a valid 64-bit splat.  But, then the
3229     // caller would also need to check and handle the change in size.
3230     return SDValue();
3231
3232   case 64: {
3233     if (type != VMOVModImm)
3234       return SDValue();
3235     // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
3236     uint64_t BitMask = 0xff;
3237     uint64_t Val = 0;
3238     unsigned ImmMask = 1;
3239     Imm = 0;
3240     for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
3241       if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
3242         Val |= BitMask;
3243         Imm |= ImmMask;
3244       } else if ((SplatBits & BitMask) != 0) {
3245         return SDValue();
3246       }
3247       BitMask <<= 8;
3248       ImmMask <<= 1;
3249     }
3250     // Op=1, Cmode=1110.
3251     OpCmode = 0x1e;
3252     SplatBits = Val;
3253     VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
3254     break;
3255   }
3256
3257   default:
3258     llvm_unreachable("unexpected size for isNEONModifiedImm");
3259     return SDValue();
3260   }
3261
3262   unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
3263   return DAG.getTargetConstant(EncodedVal, MVT::i32);
3264 }
3265
3266 static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
3267                        bool &ReverseVEXT, unsigned &Imm) {
3268   unsigned NumElts = VT.getVectorNumElements();
3269   ReverseVEXT = false;
3270
3271   // Assume that the first shuffle index is not UNDEF.  Fail if it is.
3272   if (M[0] < 0)
3273     return false;
3274
3275   Imm = M[0];
3276
3277   // If this is a VEXT shuffle, the immediate value is the index of the first
3278   // element.  The other shuffle indices must be the successive elements after
3279   // the first one.
3280   unsigned ExpectedElt = Imm;
3281   for (unsigned i = 1; i < NumElts; ++i) {
3282     // Increment the expected index.  If it wraps around, it may still be
3283     // a VEXT but the source vectors must be swapped.
3284     ExpectedElt += 1;
3285     if (ExpectedElt == NumElts * 2) {
3286       ExpectedElt = 0;
3287       ReverseVEXT = true;
3288     }
3289
3290     if (M[i] < 0) continue; // ignore UNDEF indices
3291     if (ExpectedElt != static_cast<unsigned>(M[i]))
3292       return false;
3293   }
3294
3295   // Adjust the index value if the source operands will be swapped.
3296   if (ReverseVEXT)
3297     Imm -= NumElts;
3298
3299   return true;
3300 }
3301
3302 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
3303 /// instruction with the specified blocksize.  (The order of the elements
3304 /// within each block of the vector is reversed.)
3305 static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
3306                        unsigned BlockSize) {
3307   assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
3308          "Only possible block sizes for VREV are: 16, 32, 64");
3309
3310   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3311   if (EltSz == 64)
3312     return false;
3313
3314   unsigned NumElts = VT.getVectorNumElements();
3315   unsigned BlockElts = M[0] + 1;
3316   // If the first shuffle index is UNDEF, be optimistic.
3317   if (M[0] < 0)
3318     BlockElts = BlockSize / EltSz;
3319
3320   if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
3321     return false;
3322
3323   for (unsigned i = 0; i < NumElts; ++i) {
3324     if (M[i] < 0) continue; // ignore UNDEF indices
3325     if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
3326       return false;
3327   }
3328
3329   return true;
3330 }
3331
3332 static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
3333                        unsigned &WhichResult) {
3334   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3335   if (EltSz == 64)
3336     return false;
3337
3338   unsigned NumElts = VT.getVectorNumElements();
3339   WhichResult = (M[0] == 0 ? 0 : 1);
3340   for (unsigned i = 0; i < NumElts; i += 2) {
3341     if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
3342         (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
3343       return false;
3344   }
3345   return true;
3346 }
3347
3348 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
3349 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3350 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
3351 static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3352                                 unsigned &WhichResult) {
3353   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3354   if (EltSz == 64)
3355     return false;
3356
3357   unsigned NumElts = VT.getVectorNumElements();
3358   WhichResult = (M[0] == 0 ? 0 : 1);
3359   for (unsigned i = 0; i < NumElts; i += 2) {
3360     if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
3361         (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
3362       return false;
3363   }
3364   return true;
3365 }
3366
3367 static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
3368                        unsigned &WhichResult) {
3369   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3370   if (EltSz == 64)
3371     return false;
3372
3373   unsigned NumElts = VT.getVectorNumElements();
3374   WhichResult = (M[0] == 0 ? 0 : 1);
3375   for (unsigned i = 0; i != NumElts; ++i) {
3376     if (M[i] < 0) continue; // ignore UNDEF indices
3377     if ((unsigned) M[i] != 2 * i + WhichResult)
3378       return false;
3379   }
3380
3381   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3382   if (VT.is64BitVector() && EltSz == 32)
3383     return false;
3384
3385   return true;
3386 }
3387
3388 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
3389 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3390 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
3391 static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3392                                 unsigned &WhichResult) {
3393   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3394   if (EltSz == 64)
3395     return false;
3396
3397   unsigned Half = VT.getVectorNumElements() / 2;
3398   WhichResult = (M[0] == 0 ? 0 : 1);
3399   for (unsigned j = 0; j != 2; ++j) {
3400     unsigned Idx = WhichResult;
3401     for (unsigned i = 0; i != Half; ++i) {
3402       int MIdx = M[i + j * Half];
3403       if (MIdx >= 0 && (unsigned) MIdx != Idx)
3404         return false;
3405       Idx += 2;
3406     }
3407   }
3408
3409   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3410   if (VT.is64BitVector() && EltSz == 32)
3411     return false;
3412
3413   return true;
3414 }
3415
3416 static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
3417                        unsigned &WhichResult) {
3418   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3419   if (EltSz == 64)
3420     return false;
3421
3422   unsigned NumElts = VT.getVectorNumElements();
3423   WhichResult = (M[0] == 0 ? 0 : 1);
3424   unsigned Idx = WhichResult * NumElts / 2;
3425   for (unsigned i = 0; i != NumElts; i += 2) {
3426     if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
3427         (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
3428       return false;
3429     Idx += 1;
3430   }
3431
3432   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3433   if (VT.is64BitVector() && EltSz == 32)
3434     return false;
3435
3436   return true;
3437 }
3438
3439 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
3440 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3441 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
3442 static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3443                                 unsigned &WhichResult) {
3444   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3445   if (EltSz == 64)
3446     return false;
3447
3448   unsigned NumElts = VT.getVectorNumElements();
3449   WhichResult = (M[0] == 0 ? 0 : 1);
3450   unsigned Idx = WhichResult * NumElts / 2;
3451   for (unsigned i = 0; i != NumElts; i += 2) {
3452     if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
3453         (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
3454       return false;
3455     Idx += 1;
3456   }
3457
3458   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3459   if (VT.is64BitVector() && EltSz == 32)
3460     return false;
3461
3462   return true;
3463 }
3464
3465 // If N is an integer constant that can be moved into a register in one
3466 // instruction, return an SDValue of such a constant (will become a MOV
3467 // instruction).  Otherwise return null.
3468 static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
3469                                      const ARMSubtarget *ST, DebugLoc dl) {
3470   uint64_t Val;
3471   if (!isa<ConstantSDNode>(N))
3472     return SDValue();
3473   Val = cast<ConstantSDNode>(N)->getZExtValue();
3474
3475   if (ST->isThumb1Only()) {
3476     if (Val <= 255 || ~Val <= 255)
3477       return DAG.getConstant(Val, MVT::i32);
3478   } else {
3479     if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
3480       return DAG.getConstant(Val, MVT::i32);
3481   }
3482   return SDValue();
3483 }
3484
3485 // If this is a case we can't handle, return null and let the default
3486 // expansion code take care of it.
3487 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
3488                                  const ARMSubtarget *ST) {
3489   BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
3490   DebugLoc dl = Op.getDebugLoc();
3491   EVT VT = Op.getValueType();
3492
3493   APInt SplatBits, SplatUndef;
3494   unsigned SplatBitSize;
3495   bool HasAnyUndefs;
3496   if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
3497     if (SplatBitSize <= 64) {
3498       // Check if an immediate VMOV works.
3499       EVT VmovVT;
3500       SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
3501                                       SplatUndef.getZExtValue(), SplatBitSize,
3502                                       DAG, VmovVT, VT.is128BitVector(),
3503                                       VMOVModImm);
3504       if (Val.getNode()) {
3505         SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
3506         return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
3507       }
3508
3509       // Try an immediate VMVN.
3510       uint64_t NegatedImm = (SplatBits.getZExtValue() ^
3511                              ((1LL << SplatBitSize) - 1));
3512       Val = isNEONModifiedImm(NegatedImm,
3513                                       SplatUndef.getZExtValue(), SplatBitSize,
3514                                       DAG, VmovVT, VT.is128BitVector(),
3515                                       VMVNModImm);
3516       if (Val.getNode()) {
3517         SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
3518         return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
3519       }
3520     }
3521   }
3522
3523   // Scan through the operands to see if only one value is used.
3524   unsigned NumElts = VT.getVectorNumElements();
3525   bool isOnlyLowElement = true;
3526   bool usesOnlyOneValue = true;
3527   bool isConstant = true;
3528   SDValue Value;
3529   for (unsigned i = 0; i < NumElts; ++i) {
3530     SDValue V = Op.getOperand(i);
3531     if (V.getOpcode() == ISD::UNDEF)
3532       continue;
3533     if (i > 0)
3534       isOnlyLowElement = false;
3535     if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
3536       isConstant = false;
3537
3538     if (!Value.getNode())
3539       Value = V;
3540     else if (V != Value)
3541       usesOnlyOneValue = false;
3542   }
3543
3544   if (!Value.getNode())
3545     return DAG.getUNDEF(VT);
3546
3547   if (isOnlyLowElement)
3548     return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
3549
3550   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3551
3552   // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
3553   // i32 and try again.
3554   if (usesOnlyOneValue && EltSize <= 32) {
3555     if (!isConstant)
3556       return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
3557     if (VT.getVectorElementType().isFloatingPoint()) {
3558       SmallVector<SDValue, 8> Ops;
3559       for (unsigned i = 0; i < NumElts; ++i)
3560         Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
3561                                   Op.getOperand(i)));
3562       EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
3563       SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
3564       Val = LowerBUILD_VECTOR(Val, DAG, ST);
3565       if (Val.getNode())
3566         return DAG.getNode(ISD::BITCAST, dl, VT, Val);
3567     }
3568     SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
3569     if (Val.getNode())
3570       return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
3571   }
3572
3573   // If all elements are constants and the case above didn't get hit, fall back
3574   // to the default expansion, which will generate a load from the constant
3575   // pool.
3576   if (isConstant)
3577     return SDValue();
3578
3579   // Vectors with 32- or 64-bit elements can be built by directly assigning
3580   // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands
3581   // will be legalized.
3582   if (EltSize >= 32) {
3583     // Do the expansion with floating-point types, since that is what the VFP
3584     // registers are defined to use, and since i64 is not legal.
3585     EVT EltVT = EVT::getFloatingPointVT(EltSize);
3586     EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
3587     SmallVector<SDValue, 8> Ops;
3588     for (unsigned i = 0; i < NumElts; ++i)
3589       Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
3590     SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
3591     return DAG.getNode(ISD::BITCAST, dl, VT, Val);
3592   }
3593
3594   return SDValue();
3595 }
3596
3597 /// isShuffleMaskLegal - Targets can use this to indicate that they only
3598 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
3599 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
3600 /// are assumed to be legal.
3601 bool
3602 ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
3603                                       EVT VT) const {
3604   if (VT.getVectorNumElements() == 4 &&
3605       (VT.is128BitVector() || VT.is64BitVector())) {
3606     unsigned PFIndexes[4];
3607     for (unsigned i = 0; i != 4; ++i) {
3608       if (M[i] < 0)
3609         PFIndexes[i] = 8;
3610       else
3611         PFIndexes[i] = M[i];
3612     }
3613
3614     // Compute the index in the perfect shuffle table.
3615     unsigned PFTableIndex =
3616       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
3617     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
3618     unsigned Cost = (PFEntry >> 30);
3619
3620     if (Cost <= 4)
3621       return true;
3622   }
3623
3624   bool ReverseVEXT;
3625   unsigned Imm, WhichResult;
3626
3627   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3628   return (EltSize >= 32 ||
3629           ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
3630           isVREVMask(M, VT, 64) ||
3631           isVREVMask(M, VT, 32) ||
3632           isVREVMask(M, VT, 16) ||
3633           isVEXTMask(M, VT, ReverseVEXT, Imm) ||
3634           isVTRNMask(M, VT, WhichResult) ||
3635           isVUZPMask(M, VT, WhichResult) ||
3636           isVZIPMask(M, VT, WhichResult) ||
3637           isVTRN_v_undef_Mask(M, VT, WhichResult) ||
3638           isVUZP_v_undef_Mask(M, VT, WhichResult) ||
3639           isVZIP_v_undef_Mask(M, VT, WhichResult));
3640 }
3641
3642 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
3643 /// the specified operations to build the shuffle.
3644 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
3645                                       SDValue RHS, SelectionDAG &DAG,
3646                                       DebugLoc dl) {
3647   unsigned OpNum = (PFEntry >> 26) & 0x0F;
3648   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
3649   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
3650
3651   enum {
3652     OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
3653     OP_VREV,
3654     OP_VDUP0,
3655     OP_VDUP1,
3656     OP_VDUP2,
3657     OP_VDUP3,
3658     OP_VEXT1,
3659     OP_VEXT2,
3660     OP_VEXT3,
3661     OP_VUZPL, // VUZP, left result
3662     OP_VUZPR, // VUZP, right result
3663     OP_VZIPL, // VZIP, left result
3664     OP_VZIPR, // VZIP, right result
3665     OP_VTRNL, // VTRN, left result
3666     OP_VTRNR  // VTRN, right result
3667   };
3668
3669   if (OpNum == OP_COPY) {
3670     if (LHSID == (1*9+2)*9+3) return LHS;
3671     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
3672     return RHS;
3673   }
3674
3675   SDValue OpLHS, OpRHS;
3676   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
3677   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
3678   EVT VT = OpLHS.getValueType();
3679
3680   switch (OpNum) {
3681   default: llvm_unreachable("Unknown shuffle opcode!");
3682   case OP_VREV:
3683     return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
3684   case OP_VDUP0:
3685   case OP_VDUP1:
3686   case OP_VDUP2:
3687   case OP_VDUP3:
3688     return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
3689                        OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
3690   case OP_VEXT1:
3691   case OP_VEXT2:
3692   case OP_VEXT3:
3693     return DAG.getNode(ARMISD::VEXT, dl, VT,
3694                        OpLHS, OpRHS,
3695                        DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
3696   case OP_VUZPL:
3697   case OP_VUZPR:
3698     return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3699                        OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
3700   case OP_VZIPL:
3701   case OP_VZIPR:
3702     return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3703                        OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
3704   case OP_VTRNL:
3705   case OP_VTRNR:
3706     return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3707                        OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
3708   }
3709 }
3710
3711 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
3712   SDValue V1 = Op.getOperand(0);
3713   SDValue V2 = Op.getOperand(1);
3714   DebugLoc dl = Op.getDebugLoc();
3715   EVT VT = Op.getValueType();
3716   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
3717   SmallVector<int, 8> ShuffleMask;
3718
3719   // Convert shuffles that are directly supported on NEON to target-specific
3720   // DAG nodes, instead of keeping them as shuffles and matching them again
3721   // during code selection.  This is more efficient and avoids the possibility
3722   // of inconsistencies between legalization and selection.
3723   // FIXME: floating-point vectors should be canonicalized to integer vectors
3724   // of the same time so that they get CSEd properly.
3725   SVN->getMask(ShuffleMask);
3726
3727   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3728   if (EltSize <= 32) {
3729     if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
3730       int Lane = SVN->getSplatIndex();
3731       // If this is undef splat, generate it via "just" vdup, if possible.
3732       if (Lane == -1) Lane = 0;
3733
3734       if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
3735         return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
3736       }
3737       return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
3738                          DAG.getConstant(Lane, MVT::i32));
3739     }
3740
3741     bool ReverseVEXT;
3742     unsigned Imm;
3743     if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
3744       if (ReverseVEXT)
3745         std::swap(V1, V2);
3746       return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
3747                          DAG.getConstant(Imm, MVT::i32));
3748     }
3749
3750     if (isVREVMask(ShuffleMask, VT, 64))
3751       return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
3752     if (isVREVMask(ShuffleMask, VT, 32))
3753       return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
3754     if (isVREVMask(ShuffleMask, VT, 16))
3755       return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
3756
3757     // Check for Neon shuffles that modify both input vectors in place.
3758     // If both results are used, i.e., if there are two shuffles with the same
3759     // source operands and with masks corresponding to both results of one of
3760     // these operations, DAG memoization will ensure that a single node is
3761     // used for both shuffles.
3762     unsigned WhichResult;
3763     if (isVTRNMask(ShuffleMask, VT, WhichResult))
3764       return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3765                          V1, V2).getValue(WhichResult);
3766     if (isVUZPMask(ShuffleMask, VT, WhichResult))
3767       return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3768                          V1, V2).getValue(WhichResult);
3769     if (isVZIPMask(ShuffleMask, VT, WhichResult))
3770       return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3771                          V1, V2).getValue(WhichResult);
3772
3773     if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
3774       return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3775                          V1, V1).getValue(WhichResult);
3776     if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
3777       return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3778                          V1, V1).getValue(WhichResult);
3779     if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
3780       return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3781                          V1, V1).getValue(WhichResult);
3782   }
3783
3784   // If the shuffle is not directly supported and it has 4 elements, use
3785   // the PerfectShuffle-generated table to synthesize it from other shuffles.
3786   unsigned NumElts = VT.getVectorNumElements();
3787   if (NumElts == 4) {
3788     unsigned PFIndexes[4];
3789     for (unsigned i = 0; i != 4; ++i) {
3790       if (ShuffleMask[i] < 0)
3791         PFIndexes[i] = 8;
3792       else
3793         PFIndexes[i] = ShuffleMask[i];
3794     }
3795
3796     // Compute the index in the perfect shuffle table.
3797     unsigned PFTableIndex =
3798       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
3799     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
3800     unsigned Cost = (PFEntry >> 30);
3801
3802     if (Cost <= 4)
3803       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
3804   }
3805
3806   // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
3807   if (EltSize >= 32) {
3808     // Do the expansion with floating-point types, since that is what the VFP
3809     // registers are defined to use, and since i64 is not legal.
3810     EVT EltVT = EVT::getFloatingPointVT(EltSize);
3811     EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
3812     V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
3813     V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
3814     SmallVector<SDValue, 8> Ops;
3815     for (unsigned i = 0; i < NumElts; ++i) {
3816       if (ShuffleMask[i] < 0)
3817         Ops.push_back(DAG.getUNDEF(EltVT));
3818       else
3819         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
3820                                   ShuffleMask[i] < (int)NumElts ? V1 : V2,
3821                                   DAG.getConstant(ShuffleMask[i] & (NumElts-1),
3822                                                   MVT::i32)));
3823     }
3824     SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
3825     return DAG.getNode(ISD::BITCAST, dl, VT, Val);
3826   }
3827
3828   return SDValue();
3829 }
3830
3831 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
3832   // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
3833   SDValue Lane = Op.getOperand(1);
3834   if (!isa<ConstantSDNode>(Lane))
3835     return SDValue();
3836
3837   SDValue Vec = Op.getOperand(0);
3838   if (Op.getValueType() == MVT::i32 &&
3839       Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
3840     DebugLoc dl = Op.getDebugLoc();
3841     return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
3842   }
3843
3844   return Op;
3845 }
3846
3847 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
3848   // The only time a CONCAT_VECTORS operation can have legal types is when
3849   // two 64-bit vectors are concatenated to a 128-bit vector.
3850   assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
3851          "unexpected CONCAT_VECTORS");
3852   DebugLoc dl = Op.getDebugLoc();
3853   SDValue Val = DAG.getUNDEF(MVT::v2f64);
3854   SDValue Op0 = Op.getOperand(0);
3855   SDValue Op1 = Op.getOperand(1);
3856   if (Op0.getOpcode() != ISD::UNDEF)
3857     Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
3858                       DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
3859                       DAG.getIntPtrConstant(0));
3860   if (Op1.getOpcode() != ISD::UNDEF)
3861     Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
3862                       DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
3863                       DAG.getIntPtrConstant(1));
3864   return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
3865 }
3866
3867 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
3868 /// element has been zero/sign-extended, depending on the isSigned parameter,
3869 /// from an integer type half its size.
3870 static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
3871                                    bool isSigned) {
3872   // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
3873   EVT VT = N->getValueType(0);
3874   if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
3875     SDNode *BVN = N->getOperand(0).getNode();
3876     if (BVN->getValueType(0) != MVT::v4i32 ||
3877         BVN->getOpcode() != ISD::BUILD_VECTOR)
3878       return false;
3879     unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
3880     unsigned HiElt = 1 - LoElt;
3881     ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
3882     ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
3883     ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
3884     ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
3885     if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
3886       return false;
3887     if (isSigned) {
3888       if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
3889           Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
3890         return true;
3891     } else {
3892       if (Hi0->isNullValue() && Hi1->isNullValue())
3893         return true;
3894     }
3895     return false;
3896   }
3897
3898   if (N->getOpcode() != ISD::BUILD_VECTOR)
3899     return false;
3900
3901   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
3902     SDNode *Elt = N->getOperand(i).getNode();
3903     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
3904       unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3905       unsigned HalfSize = EltSize / 2;
3906       if (isSigned) {
3907         int64_t SExtVal = C->getSExtValue();
3908         if ((SExtVal >> HalfSize) != (SExtVal >> EltSize))
3909           return false;
3910       } else {
3911         if ((C->getZExtValue() >> HalfSize) != 0)
3912           return false;
3913       }
3914       continue;
3915     }
3916     return false;
3917   }
3918
3919   return true;
3920 }
3921
3922 /// isSignExtended - Check if a node is a vector value that is sign-extended
3923 /// or a constant BUILD_VECTOR with sign-extended elements.
3924 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
3925   if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
3926     return true;
3927   if (isExtendedBUILD_VECTOR(N, DAG, true))
3928     return true;
3929   return false;
3930 }
3931
3932 /// isZeroExtended - Check if a node is a vector value that is zero-extended
3933 /// or a constant BUILD_VECTOR with zero-extended elements.
3934 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
3935   if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
3936     return true;
3937   if (isExtendedBUILD_VECTOR(N, DAG, false))
3938     return true;
3939   return false;
3940 }
3941
3942 /// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending
3943 /// load, or BUILD_VECTOR with extended elements, return the unextended value.
3944 static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
3945   if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
3946     return N->getOperand(0);
3947   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
3948     return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
3949                        LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
3950                        LD->isNonTemporal(), LD->getAlignment());
3951   // Otherwise, the value must be a BUILD_VECTOR.  For v2i64, it will
3952   // have been legalized as a BITCAST from v4i32.
3953   if (N->getOpcode() == ISD::BITCAST) {
3954     SDNode *BVN = N->getOperand(0).getNode();
3955     assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
3956            BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
3957     unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
3958     return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32,
3959                        BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
3960   }
3961   // Construct a new BUILD_VECTOR with elements truncated to half the size.
3962   assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
3963   EVT VT = N->getValueType(0);
3964   unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
3965   unsigned NumElts = VT.getVectorNumElements();
3966   MVT TruncVT = MVT::getIntegerVT(EltSize);
3967   SmallVector<SDValue, 8> Ops;
3968   for (unsigned i = 0; i != NumElts; ++i) {
3969     ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
3970     const APInt &CInt = C->getAPIntValue();
3971     Ops.push_back(DAG.getConstant(APInt(CInt).trunc(EltSize), TruncVT));
3972   }
3973   return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
3974                      MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
3975 }
3976
3977 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
3978   // Multiplications are only custom-lowered for 128-bit vectors so that
3979   // VMULL can be detected.  Otherwise v2i64 multiplications are not legal.
3980   EVT VT = Op.getValueType();
3981   assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
3982   SDNode *N0 = Op.getOperand(0).getNode();
3983   SDNode *N1 = Op.getOperand(1).getNode();
3984   unsigned NewOpc = 0;
3985   if (isSignExtended(N0, DAG) && isSignExtended(N1, DAG))
3986     NewOpc = ARMISD::VMULLs;
3987   else if (isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG))
3988     NewOpc = ARMISD::VMULLu;
3989   else if (VT == MVT::v2i64)
3990     // Fall through to expand this.  It is not legal.
3991     return SDValue();
3992   else
3993     // Other vector multiplications are legal.
3994     return Op;
3995
3996   // Legalize to a VMULL instruction.
3997   DebugLoc DL = Op.getDebugLoc();
3998   SDValue Op0 = SkipExtension(N0, DAG);
3999   SDValue Op1 = SkipExtension(N1, DAG);
4000
4001   assert(Op0.getValueType().is64BitVector() &&
4002          Op1.getValueType().is64BitVector() &&
4003          "unexpected types for extended operands to VMULL");
4004   return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
4005 }
4006
4007 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
4008   switch (Op.getOpcode()) {
4009   default: llvm_unreachable("Don't know how to custom lower this!");
4010   case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
4011   case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
4012   case ISD::GlobalAddress:
4013     return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
4014       LowerGlobalAddressELF(Op, DAG);
4015   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
4016   case ISD::SELECT:        return LowerSELECT(Op, DAG);
4017   case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
4018   case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
4019   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
4020   case ISD::VASTART:       return LowerVASTART(Op, DAG);
4021   case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
4022   case ISD::PREFETCH:      return LowerPREFETCH(Op, DAG, Subtarget);
4023   case ISD::SINT_TO_FP:
4024   case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
4025   case ISD::FP_TO_SINT:
4026   case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
4027   case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
4028   case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
4029   case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
4030   case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
4031   case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
4032   case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
4033   case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG);
4034   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
4035                                                                Subtarget);
4036   case ISD::BITCAST:   return ExpandBITCAST(Op.getNode(), DAG);
4037   case ISD::SHL:
4038   case ISD::SRL:
4039   case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
4040   case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
4041   case ISD::SRL_PARTS:
4042   case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
4043   case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
4044   case ISD::VSETCC:        return LowerVSETCC(Op, DAG);
4045   case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);
4046   case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
4047   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
4048   case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
4049   case ISD::FLT_ROUNDS_:   return LowerFLT_ROUNDS_(Op, DAG);
4050   case ISD::MUL:           return LowerMUL(Op, DAG);
4051   }
4052   return SDValue();
4053 }
4054
4055 /// ReplaceNodeResults - Replace the results of node with an illegal result
4056 /// type with new values built out of custom code.
4057 void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
4058                                            SmallVectorImpl<SDValue>&Results,
4059                                            SelectionDAG &DAG) const {
4060   SDValue Res;
4061   switch (N->getOpcode()) {
4062   default:
4063     llvm_unreachable("Don't know how to custom expand this!");
4064     break;
4065   case ISD::BITCAST:
4066     Res = ExpandBITCAST(N, DAG);
4067     break;
4068   case ISD::SRL:
4069   case ISD::SRA:
4070     Res = Expand64BitShift(N, DAG, Subtarget);
4071     break;
4072   }
4073   if (Res.getNode())
4074     Results.push_back(Res);
4075 }
4076
4077 //===----------------------------------------------------------------------===//
4078 //                           ARM Scheduler Hooks
4079 //===----------------------------------------------------------------------===//
4080
4081 MachineBasicBlock *
4082 ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
4083                                      MachineBasicBlock *BB,
4084                                      unsigned Size) const {
4085   unsigned dest    = MI->getOperand(0).getReg();
4086   unsigned ptr     = MI->getOperand(1).getReg();
4087   unsigned oldval  = MI->getOperand(2).getReg();
4088   unsigned newval  = MI->getOperand(3).getReg();
4089   unsigned scratch = BB->getParent()->getRegInfo()
4090     .createVirtualRegister(ARM::GPRRegisterClass);
4091   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
4092   DebugLoc dl = MI->getDebugLoc();
4093   bool isThumb2 = Subtarget->isThumb2();
4094
4095   unsigned ldrOpc, strOpc;
4096   switch (Size) {
4097   default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
4098   case 1:
4099     ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
4100     strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
4101     break;
4102   case 2:
4103     ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
4104     strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
4105     break;
4106   case 4:
4107     ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
4108     strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
4109     break;
4110   }
4111
4112   MachineFunction *MF = BB->getParent();
4113   const BasicBlock *LLVM_BB = BB->getBasicBlock();
4114   MachineFunction::iterator It = BB;
4115   ++It; // insert the new blocks after the current block
4116
4117   MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
4118   MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
4119   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4120   MF->insert(It, loop1MBB);
4121   MF->insert(It, loop2MBB);
4122   MF->insert(It, exitMBB);
4123
4124   // Transfer the remainder of BB and its successor edges to exitMBB.
4125   exitMBB->splice(exitMBB->begin(), BB,
4126                   llvm::next(MachineBasicBlock::iterator(MI)),
4127                   BB->end());
4128   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
4129
4130   //  thisMBB:
4131   //   ...
4132   //   fallthrough --> loop1MBB
4133   BB->addSuccessor(loop1MBB);
4134
4135   // loop1MBB:
4136   //   ldrex dest, [ptr]
4137   //   cmp dest, oldval
4138   //   bne exitMBB
4139   BB = loop1MBB;
4140   AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
4141   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
4142                  .addReg(dest).addReg(oldval));
4143   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
4144     .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
4145   BB->addSuccessor(loop2MBB);
4146   BB->addSuccessor(exitMBB);
4147
4148   // loop2MBB:
4149   //   strex scratch, newval, [ptr]
4150   //   cmp scratch, #0
4151   //   bne loop1MBB
4152   BB = loop2MBB;
4153   AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval)
4154                  .addReg(ptr));
4155   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
4156                  .addReg(scratch).addImm(0));
4157   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
4158     .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
4159   BB->addSuccessor(loop1MBB);
4160   BB->addSuccessor(exitMBB);
4161
4162   //  exitMBB:
4163   //   ...
4164   BB = exitMBB;
4165
4166   MI->eraseFromParent();   // The instruction is gone now.
4167
4168   return BB;
4169 }
4170
4171 MachineBasicBlock *
4172 ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
4173                                     unsigned Size, unsigned BinOpcode) const {
4174   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
4175   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
4176
4177   const BasicBlock *LLVM_BB = BB->getBasicBlock();
4178   MachineFunction *MF = BB->getParent();
4179   MachineFunction::iterator It = BB;
4180   ++It;
4181
4182   unsigned dest = MI->getOperand(0).getReg();
4183   unsigned ptr = MI->getOperand(1).getReg();
4184   unsigned incr = MI->getOperand(2).getReg();
4185   DebugLoc dl = MI->getDebugLoc();
4186
4187   bool isThumb2 = Subtarget->isThumb2();
4188   unsigned ldrOpc, strOpc;
4189   switch (Size) {
4190   default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
4191   case 1:
4192     ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
4193     strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
4194     break;
4195   case 2:
4196     ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
4197     strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
4198     break;
4199   case 4:
4200     ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
4201     strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
4202     break;
4203   }
4204
4205   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4206   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4207   MF->insert(It, loopMBB);
4208   MF->insert(It, exitMBB);
4209
4210   // Transfer the remainder of BB and its successor edges to exitMBB.
4211   exitMBB->splice(exitMBB->begin(), BB,
4212                   llvm::next(MachineBasicBlock::iterator(MI)),
4213                   BB->end());
4214   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
4215
4216   MachineRegisterInfo &RegInfo = MF->getRegInfo();
4217   unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
4218   unsigned scratch2 = (!BinOpcode) ? incr :
4219     RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
4220
4221   //  thisMBB:
4222   //   ...
4223   //   fallthrough --> loopMBB
4224   BB->addSuccessor(loopMBB);
4225
4226   //  loopMBB:
4227   //   ldrex dest, ptr
4228   //   <binop> scratch2, dest, incr
4229   //   strex scratch, scratch2, ptr
4230   //   cmp scratch, #0
4231   //   bne- loopMBB
4232   //   fallthrough --> exitMBB
4233   BB = loopMBB;
4234   AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
4235   if (BinOpcode) {
4236     // operand order needs to go the other way for NAND
4237     if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
4238       AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
4239                      addReg(incr).addReg(dest)).addReg(0);
4240     else
4241       AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
4242                      addReg(dest).addReg(incr)).addReg(0);
4243   }
4244
4245   AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
4246                  .addReg(ptr));
4247   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
4248                  .addReg(scratch).addImm(0));
4249   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
4250     .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
4251
4252   BB->addSuccessor(loopMBB);
4253   BB->addSuccessor(exitMBB);
4254
4255   //  exitMBB:
4256   //   ...
4257   BB = exitMBB;
4258
4259   MI->eraseFromParent();   // The instruction is gone now.
4260
4261   return BB;
4262 }
4263
4264 static
4265 MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
4266   for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
4267        E = MBB->succ_end(); I != E; ++I)
4268     if (*I != Succ)
4269       return *I;
4270   llvm_unreachable("Expecting a BB with two successors!");
4271 }
4272
4273 MachineBasicBlock *
4274 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
4275                                                MachineBasicBlock *BB) const {
4276   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
4277   DebugLoc dl = MI->getDebugLoc();
4278   bool isThumb2 = Subtarget->isThumb2();
4279   switch (MI->getOpcode()) {
4280   default:
4281     MI->dump();
4282     llvm_unreachable("Unexpected instr type to insert");
4283
4284   case ARM::ATOMIC_LOAD_ADD_I8:
4285      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
4286   case ARM::ATOMIC_LOAD_ADD_I16:
4287      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
4288   case ARM::ATOMIC_LOAD_ADD_I32:
4289      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
4290
4291   case ARM::ATOMIC_LOAD_AND_I8:
4292      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
4293   case ARM::ATOMIC_LOAD_AND_I16:
4294      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
4295   case ARM::ATOMIC_LOAD_AND_I32:
4296      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
4297
4298   case ARM::ATOMIC_LOAD_OR_I8:
4299      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
4300   case ARM::ATOMIC_LOAD_OR_I16:
4301      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
4302   case ARM::ATOMIC_LOAD_OR_I32:
4303      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
4304
4305   case ARM::ATOMIC_LOAD_XOR_I8:
4306      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
4307   case ARM::ATOMIC_LOAD_XOR_I16:
4308      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
4309   case ARM::ATOMIC_LOAD_XOR_I32:
4310      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
4311
4312   case ARM::ATOMIC_LOAD_NAND_I8:
4313      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
4314   case ARM::ATOMIC_LOAD_NAND_I16:
4315      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
4316   case ARM::ATOMIC_LOAD_NAND_I32:
4317      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
4318
4319   case ARM::ATOMIC_LOAD_SUB_I8:
4320      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
4321   case ARM::ATOMIC_LOAD_SUB_I16:
4322      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
4323   case ARM::ATOMIC_LOAD_SUB_I32:
4324      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
4325
4326   case ARM::ATOMIC_SWAP_I8:  return EmitAtomicBinary(MI, BB, 1, 0);
4327   case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
4328   case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
4329
4330   case ARM::ATOMIC_CMP_SWAP_I8:  return EmitAtomicCmpSwap(MI, BB, 1);
4331   case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
4332   case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
4333
4334   case ARM::tMOVCCr_pseudo: {
4335     // To "insert" a SELECT_CC instruction, we actually have to insert the
4336     // diamond control-flow pattern.  The incoming instruction knows the
4337     // destination vreg to set, the condition code register to branch on, the
4338     // true/false values to select between, and a branch opcode to use.
4339     const BasicBlock *LLVM_BB = BB->getBasicBlock();
4340     MachineFunction::iterator It = BB;
4341     ++It;
4342
4343     //  thisMBB:
4344     //  ...
4345     //   TrueVal = ...
4346     //   cmpTY ccX, r1, r2
4347     //   bCC copy1MBB
4348     //   fallthrough --> copy0MBB
4349     MachineBasicBlock *thisMBB  = BB;
4350     MachineFunction *F = BB->getParent();
4351     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
4352     MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
4353     F->insert(It, copy0MBB);
4354     F->insert(It, sinkMBB);
4355
4356     // Transfer the remainder of BB and its successor edges to sinkMBB.
4357     sinkMBB->splice(sinkMBB->begin(), BB,
4358                     llvm::next(MachineBasicBlock::iterator(MI)),
4359                     BB->end());
4360     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
4361
4362     BB->addSuccessor(copy0MBB);
4363     BB->addSuccessor(sinkMBB);
4364
4365     BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
4366       .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
4367
4368     //  copy0MBB:
4369     //   %FalseValue = ...
4370     //   # fallthrough to sinkMBB
4371     BB = copy0MBB;
4372
4373     // Update machine-CFG edges
4374     BB->addSuccessor(sinkMBB);
4375
4376     //  sinkMBB:
4377     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
4378     //  ...
4379     BB = sinkMBB;
4380     BuildMI(*BB, BB->begin(), dl,
4381             TII->get(ARM::PHI), MI->getOperand(0).getReg())
4382       .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
4383       .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
4384
4385     MI->eraseFromParent();   // The pseudo instruction is gone now.
4386     return BB;
4387   }
4388
4389   case ARM::BCCi64:
4390   case ARM::BCCZi64: {
4391     // Compare both parts that make up the double comparison separately for
4392     // equality.
4393     bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
4394
4395     unsigned LHS1 = MI->getOperand(1).getReg();
4396     unsigned LHS2 = MI->getOperand(2).getReg();
4397     if (RHSisZero) {
4398       AddDefaultPred(BuildMI(BB, dl,
4399                              TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
4400                      .addReg(LHS1).addImm(0));
4401       BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
4402         .addReg(LHS2).addImm(0)
4403         .addImm(ARMCC::EQ).addReg(ARM::CPSR);
4404     } else {
4405       unsigned RHS1 = MI->getOperand(3).getReg();
4406       unsigned RHS2 = MI->getOperand(4).getReg();
4407       AddDefaultPred(BuildMI(BB, dl,
4408                              TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
4409                      .addReg(LHS1).addReg(RHS1));
4410       BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
4411         .addReg(LHS2).addReg(RHS2)
4412         .addImm(ARMCC::EQ).addReg(ARM::CPSR);
4413     }
4414
4415     MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
4416     MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
4417     if (MI->getOperand(0).getImm() == ARMCC::NE)
4418       std::swap(destMBB, exitMBB);
4419
4420     BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
4421       .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
4422     BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B))
4423       .addMBB(exitMBB);
4424
4425     MI->eraseFromParent();   // The pseudo instruction is gone now.
4426     return BB;
4427   }
4428   }
4429 }
4430
4431 //===----------------------------------------------------------------------===//
4432 //                           ARM Optimization Hooks
4433 //===----------------------------------------------------------------------===//
4434
4435 static
4436 SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
4437                             TargetLowering::DAGCombinerInfo &DCI) {
4438   SelectionDAG &DAG = DCI.DAG;
4439   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4440   EVT VT = N->getValueType(0);
4441   unsigned Opc = N->getOpcode();
4442   bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
4443   SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
4444   SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
4445   ISD::CondCode CC = ISD::SETCC_INVALID;
4446
4447   if (isSlctCC) {
4448     CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
4449   } else {
4450     SDValue CCOp = Slct.getOperand(0);
4451     if (CCOp.getOpcode() == ISD::SETCC)
4452       CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
4453   }
4454
4455   bool DoXform = false;
4456   bool InvCC = false;
4457   assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
4458           "Bad input!");
4459
4460   if (LHS.getOpcode() == ISD::Constant &&
4461       cast<ConstantSDNode>(LHS)->isNullValue()) {
4462     DoXform = true;
4463   } else if (CC != ISD::SETCC_INVALID &&
4464              RHS.getOpcode() == ISD::Constant &&
4465              cast<ConstantSDNode>(RHS)->isNullValue()) {
4466     std::swap(LHS, RHS);
4467     SDValue Op0 = Slct.getOperand(0);
4468     EVT OpVT = isSlctCC ? Op0.getValueType() :
4469                           Op0.getOperand(0).getValueType();
4470     bool isInt = OpVT.isInteger();
4471     CC = ISD::getSetCCInverse(CC, isInt);
4472
4473     if (!TLI.isCondCodeLegal(CC, OpVT))
4474       return SDValue();         // Inverse operator isn't legal.
4475
4476     DoXform = true;
4477     InvCC = true;
4478   }
4479
4480   if (DoXform) {
4481     SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
4482     if (isSlctCC)
4483       return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
4484                              Slct.getOperand(0), Slct.getOperand(1), CC);
4485     SDValue CCOp = Slct.getOperand(0);
4486     if (InvCC)
4487       CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
4488                           CCOp.getOperand(0), CCOp.getOperand(1), CC);
4489     return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
4490                        CCOp, OtherOp, Result);
4491   }
4492   return SDValue();
4493 }
4494
4495 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
4496 /// operands N0 and N1.  This is a helper for PerformADDCombine that is
4497 /// called with the default operands, and if that fails, with commuted
4498 /// operands.
4499 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
4500                                          TargetLowering::DAGCombinerInfo &DCI) {
4501   // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
4502   if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
4503     SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
4504     if (Result.getNode()) return Result;
4505   }
4506   return SDValue();
4507 }
4508
4509 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
4510 ///
4511 static SDValue PerformADDCombine(SDNode *N,
4512                                  TargetLowering::DAGCombinerInfo &DCI) {
4513   SDValue N0 = N->getOperand(0);
4514   SDValue N1 = N->getOperand(1);
4515
4516   // First try with the default operand order.
4517   SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI);
4518   if (Result.getNode())
4519     return Result;
4520
4521   // If that didn't work, try again with the operands commuted.
4522   return PerformADDCombineWithOperands(N, N1, N0, DCI);
4523 }
4524
4525 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
4526 ///
4527 static SDValue PerformSUBCombine(SDNode *N,
4528                                  TargetLowering::DAGCombinerInfo &DCI) {
4529   SDValue N0 = N->getOperand(0);
4530   SDValue N1 = N->getOperand(1);
4531
4532   // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
4533   if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
4534     SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
4535     if (Result.getNode()) return Result;
4536   }
4537
4538   return SDValue();
4539 }
4540
4541 static SDValue PerformMULCombine(SDNode *N,
4542                                  TargetLowering::DAGCombinerInfo &DCI,
4543                                  const ARMSubtarget *Subtarget) {
4544   SelectionDAG &DAG = DCI.DAG;
4545
4546   if (Subtarget->isThumb1Only())
4547     return SDValue();
4548
4549   if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
4550     return SDValue();
4551
4552   EVT VT = N->getValueType(0);
4553   if (VT != MVT::i32)
4554     return SDValue();
4555
4556   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
4557   if (!C)
4558     return SDValue();
4559
4560   uint64_t MulAmt = C->getZExtValue();
4561   unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
4562   ShiftAmt = ShiftAmt & (32 - 1);
4563   SDValue V = N->getOperand(0);
4564   DebugLoc DL = N->getDebugLoc();
4565
4566   SDValue Res;
4567   MulAmt >>= ShiftAmt;
4568   if (isPowerOf2_32(MulAmt - 1)) {
4569     // (mul x, 2^N + 1) => (add (shl x, N), x)
4570     Res = DAG.getNode(ISD::ADD, DL, VT,
4571                       V, DAG.getNode(ISD::SHL, DL, VT,
4572                                      V, DAG.getConstant(Log2_32(MulAmt-1),
4573                                                         MVT::i32)));
4574   } else if (isPowerOf2_32(MulAmt + 1)) {
4575     // (mul x, 2^N - 1) => (sub (shl x, N), x)
4576     Res = DAG.getNode(ISD::SUB, DL, VT,
4577                       DAG.getNode(ISD::SHL, DL, VT,
4578                                   V, DAG.getConstant(Log2_32(MulAmt+1),
4579                                                      MVT::i32)),
4580                                                      V);
4581   } else
4582     return SDValue();
4583
4584   if (ShiftAmt != 0)
4585     Res = DAG.getNode(ISD::SHL, DL, VT, Res,
4586                       DAG.getConstant(ShiftAmt, MVT::i32));
4587
4588   // Do not add new nodes to DAG combiner worklist.
4589   DCI.CombineTo(N, Res, false);
4590   return SDValue();
4591 }
4592
4593 static SDValue PerformANDCombine(SDNode *N,
4594                                 TargetLowering::DAGCombinerInfo &DCI) {
4595   // Attempt to use immediate-form VBIC
4596   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
4597   DebugLoc dl = N->getDebugLoc();
4598   EVT VT = N->getValueType(0);
4599   SelectionDAG &DAG = DCI.DAG;
4600
4601   APInt SplatBits, SplatUndef;
4602   unsigned SplatBitSize;
4603   bool HasAnyUndefs;
4604   if (BVN &&
4605       BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
4606     if (SplatBitSize <= 64) {
4607       EVT VbicVT;
4608       SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
4609                                       SplatUndef.getZExtValue(), SplatBitSize,
4610                                       DAG, VbicVT, VT.is128BitVector(),
4611                                       OtherModImm);
4612       if (Val.getNode()) {
4613         SDValue Input =
4614           DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
4615         SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
4616         return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
4617       }
4618     }
4619   }
4620
4621   return SDValue();
4622 }
4623
4624 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR
4625 static SDValue PerformORCombine(SDNode *N,
4626                                 TargetLowering::DAGCombinerInfo &DCI,
4627                                 const ARMSubtarget *Subtarget) {
4628   // Attempt to use immediate-form VORR
4629   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
4630   DebugLoc dl = N->getDebugLoc();
4631   EVT VT = N->getValueType(0);
4632   SelectionDAG &DAG = DCI.DAG;
4633
4634   APInt SplatBits, SplatUndef;
4635   unsigned SplatBitSize;
4636   bool HasAnyUndefs;
4637   if (BVN && Subtarget->hasNEON() &&
4638       BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
4639     if (SplatBitSize <= 64) {
4640       EVT VorrVT;
4641       SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
4642                                       SplatUndef.getZExtValue(), SplatBitSize,
4643                                       DAG, VorrVT, VT.is128BitVector(),
4644                                       OtherModImm);
4645       if (Val.getNode()) {
4646         SDValue Input =
4647           DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
4648         SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
4649         return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
4650       }
4651     }
4652   }
4653
4654   // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
4655   // reasonable.
4656
4657   // BFI is only available on V6T2+
4658   if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
4659     return SDValue();
4660
4661   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4662   DebugLoc DL = N->getDebugLoc();
4663   // 1) or (and A, mask), val => ARMbfi A, val, mask
4664   //      iff (val & mask) == val
4665   //
4666   // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
4667   //  2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
4668   //          && CountPopulation_32(mask) == CountPopulation_32(~mask2)
4669   //  2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
4670   //          && CountPopulation_32(mask) == CountPopulation_32(~mask2)
4671   //  (i.e., copy a bitfield value into another bitfield of the same width)
4672   if (N0.getOpcode() != ISD::AND)
4673     return SDValue();
4674
4675   if (VT != MVT::i32)
4676     return SDValue();
4677
4678
4679   // The value and the mask need to be constants so we can verify this is
4680   // actually a bitfield set. If the mask is 0xffff, we can do better
4681   // via a movt instruction, so don't use BFI in that case.
4682   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4683   if (!C)
4684     return SDValue();
4685   unsigned Mask = C->getZExtValue();
4686   if (Mask == 0xffff)
4687     return SDValue();
4688   SDValue Res;
4689   // Case (1): or (and A, mask), val => ARMbfi A, val, mask
4690   if ((C = dyn_cast<ConstantSDNode>(N1))) {
4691     unsigned Val = C->getZExtValue();
4692     if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val)
4693       return SDValue();
4694     Val >>= CountTrailingZeros_32(~Mask);
4695
4696     Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0),
4697                       DAG.getConstant(Val, MVT::i32),
4698                       DAG.getConstant(Mask, MVT::i32));
4699
4700     // Do not add new nodes to DAG combiner worklist.
4701     DCI.CombineTo(N, Res, false);
4702   } else if (N1.getOpcode() == ISD::AND) {
4703     // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
4704     C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4705     if (!C)
4706       return SDValue();
4707     unsigned Mask2 = C->getZExtValue();
4708
4709     if (ARM::isBitFieldInvertedMask(Mask) &&
4710         ARM::isBitFieldInvertedMask(~Mask2) &&
4711         (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) {
4712       // The pack halfword instruction works better for masks that fit it,
4713       // so use that when it's available.
4714       if (Subtarget->hasT2ExtractPack() &&
4715           (Mask == 0xffff || Mask == 0xffff0000))
4716         return SDValue();
4717       // 2a
4718       unsigned lsb = CountTrailingZeros_32(Mask2);
4719       Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
4720                         DAG.getConstant(lsb, MVT::i32));
4721       Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res,
4722                         DAG.getConstant(Mask, MVT::i32));
4723       // Do not add new nodes to DAG combiner worklist.
4724       DCI.CombineTo(N, Res, false);
4725     } else if (ARM::isBitFieldInvertedMask(~Mask) &&
4726                ARM::isBitFieldInvertedMask(Mask2) &&
4727                (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) {
4728       // The pack halfword instruction works better for masks that fit it,
4729       // so use that when it's available.
4730       if (Subtarget->hasT2ExtractPack() &&
4731           (Mask2 == 0xffff || Mask2 == 0xffff0000))
4732         return SDValue();
4733       // 2b
4734       unsigned lsb = CountTrailingZeros_32(Mask);
4735       Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
4736                         DAG.getConstant(lsb, MVT::i32));
4737       Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
4738                                 DAG.getConstant(Mask2, MVT::i32));
4739       // Do not add new nodes to DAG combiner worklist.
4740       DCI.CombineTo(N, Res, false);
4741     }
4742   }
4743
4744   return SDValue();
4745 }
4746
4747 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for
4748 /// ARMISD::VMOVRRD.
4749 static SDValue PerformVMOVRRDCombine(SDNode *N,
4750                                      TargetLowering::DAGCombinerInfo &DCI) {
4751   // vmovrrd(vmovdrr x, y) -> x,y
4752   SDValue InDouble = N->getOperand(0);
4753   if (InDouble.getOpcode() == ARMISD::VMOVDRR)
4754     return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
4755   return SDValue();
4756 }
4757
4758 /// PerformVMOVDRRCombine - Target-specific dag combine xforms for
4759 /// ARMISD::VMOVDRR.  This is also used for BUILD_VECTORs with 2 operands.
4760 static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
4761   // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
4762   SDValue Op0 = N->getOperand(0);
4763   SDValue Op1 = N->getOperand(1);
4764   if (Op0.getOpcode() == ISD::BITCAST)
4765     Op0 = Op0.getOperand(0);
4766   if (Op1.getOpcode() == ISD::BITCAST)
4767     Op1 = Op1.getOperand(0);
4768   if (Op0.getOpcode() == ARMISD::VMOVRRD &&
4769       Op0.getNode() == Op1.getNode() &&
4770       Op0.getResNo() == 0 && Op1.getResNo() == 1)
4771     return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
4772                        N->getValueType(0), Op0.getOperand(0));
4773   return SDValue();
4774 }
4775
4776 /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
4777 /// ISD::BUILD_VECTOR.
4778 static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG) {
4779   // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
4780   // VMOVRRD is introduced when legalizing i64 types.  It forces the i64 value
4781   // into a pair of GPRs, which is fine when the value is used as a scalar,
4782   // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
4783   if (N->getNumOperands() == 2)
4784     return PerformVMOVDRRCombine(N, DAG);
4785
4786   return SDValue();
4787 }
4788
4789 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
4790 /// ISD::VECTOR_SHUFFLE.
4791 static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
4792   // The LLVM shufflevector instruction does not require the shuffle mask
4793   // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
4794   // have that requirement.  When translating to ISD::VECTOR_SHUFFLE, if the
4795   // operands do not match the mask length, they are extended by concatenating
4796   // them with undef vectors.  That is probably the right thing for other
4797   // targets, but for NEON it is better to concatenate two double-register
4798   // size vector operands into a single quad-register size vector.  Do that
4799   // transformation here:
4800   //   shuffle(concat(v1, undef), concat(v2, undef)) ->
4801   //   shuffle(concat(v1, v2), undef)
4802   SDValue Op0 = N->getOperand(0);
4803   SDValue Op1 = N->getOperand(1);
4804   if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
4805       Op1.getOpcode() != ISD::CONCAT_VECTORS ||
4806       Op0.getNumOperands() != 2 ||
4807       Op1.getNumOperands() != 2)
4808     return SDValue();
4809   SDValue Concat0Op1 = Op0.getOperand(1);
4810   SDValue Concat1Op1 = Op1.getOperand(1);
4811   if (Concat0Op1.getOpcode() != ISD::UNDEF ||
4812       Concat1Op1.getOpcode() != ISD::UNDEF)
4813     return SDValue();
4814   // Skip the transformation if any of the types are illegal.
4815   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4816   EVT VT = N->getValueType(0);
4817   if (!TLI.isTypeLegal(VT) ||
4818       !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
4819       !TLI.isTypeLegal(Concat1Op1.getValueType()))
4820     return SDValue();
4821
4822   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
4823                                   Op0.getOperand(0), Op1.getOperand(0));
4824   // Translate the shuffle mask.
4825   SmallVector<int, 16> NewMask;
4826   unsigned NumElts = VT.getVectorNumElements();
4827   unsigned HalfElts = NumElts/2;
4828   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
4829   for (unsigned n = 0; n < NumElts; ++n) {
4830     int MaskElt = SVN->getMaskElt(n);
4831     int NewElt = -1;
4832     if (MaskElt < (int)HalfElts)
4833       NewElt = MaskElt;
4834     else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
4835       NewElt = HalfElts + MaskElt - NumElts;
4836     NewMask.push_back(NewElt);
4837   }
4838   return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
4839                               DAG.getUNDEF(VT), NewMask.data());
4840 }
4841
4842 /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
4843 /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
4844 /// are also VDUPLANEs.  If so, combine them to a vldN-dup operation and
4845 /// return true.
4846 static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
4847   SelectionDAG &DAG = DCI.DAG;
4848   EVT VT = N->getValueType(0);
4849   // vldN-dup instructions only support 64-bit vectors for N > 1.
4850   if (!VT.is64BitVector())
4851     return false;
4852
4853   // Check if the VDUPLANE operand is a vldN-dup intrinsic.
4854   SDNode *VLD = N->getOperand(0).getNode();
4855   if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
4856     return false;
4857   unsigned NumVecs = 0;
4858   unsigned NewOpc = 0;
4859   unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
4860   if (IntNo == Intrinsic::arm_neon_vld2lane) {
4861     NumVecs = 2;
4862     NewOpc = ARMISD::VLD2DUP;
4863   } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
4864     NumVecs = 3;
4865     NewOpc = ARMISD::VLD3DUP;
4866   } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
4867     NumVecs = 4;
4868     NewOpc = ARMISD::VLD4DUP;
4869   } else {
4870     return false;
4871   }
4872
4873   // First check that all the vldN-lane uses are VDUPLANEs and that the lane
4874   // numbers match the load.
4875   unsigned VLDLaneNo =
4876     cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
4877   for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
4878        UI != UE; ++UI) {
4879     // Ignore uses of the chain result.
4880     if (UI.getUse().getResNo() == NumVecs)
4881       continue;
4882     SDNode *User = *UI;
4883     if (User->getOpcode() != ARMISD::VDUPLANE ||
4884         VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
4885       return false;
4886   }
4887
4888   // Create the vldN-dup node.
4889   EVT Tys[5];
4890   unsigned n;
4891   for (n = 0; n < NumVecs; ++n)
4892     Tys[n] = VT;
4893   Tys[n] = MVT::Other;
4894   SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
4895   SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
4896   MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
4897   SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys,
4898                                            Ops, 2, VLDMemInt->getMemoryVT(),
4899                                            VLDMemInt->getMemOperand());
4900
4901   // Update the uses.
4902   for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
4903        UI != UE; ++UI) {
4904     unsigned ResNo = UI.getUse().getResNo();
4905     // Ignore uses of the chain result.
4906     if (ResNo == NumVecs)
4907       continue;
4908     SDNode *User = *UI;
4909     DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
4910   }
4911
4912   // Now the vldN-lane intrinsic is dead except for its chain result.
4913   // Update uses of the chain.
4914   std::vector<SDValue> VLDDupResults;
4915   for (unsigned n = 0; n < NumVecs; ++n)
4916     VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
4917   VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
4918   DCI.CombineTo(VLD, VLDDupResults);
4919
4920   return true;
4921 }
4922
4923 /// PerformVDUPLANECombine - Target-specific dag combine xforms for
4924 /// ARMISD::VDUPLANE.
4925 static SDValue PerformVDUPLANECombine(SDNode *N,
4926                                       TargetLowering::DAGCombinerInfo &DCI) {
4927   SDValue Op = N->getOperand(0);
4928
4929   // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
4930   // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
4931   if (CombineVLDDUP(N, DCI))
4932     return SDValue(N, 0);
4933
4934   // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
4935   // redundant.  Ignore bit_converts for now; element sizes are checked below.
4936   while (Op.getOpcode() == ISD::BITCAST)
4937     Op = Op.getOperand(0);
4938   if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
4939     return SDValue();
4940
4941   // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
4942   unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
4943   // The canonical VMOV for a zero vector uses a 32-bit element size.
4944   unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4945   unsigned EltBits;
4946   if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
4947     EltSize = 8;
4948   EVT VT = N->getValueType(0);
4949   if (EltSize > VT.getVectorElementType().getSizeInBits())
4950     return SDValue();
4951
4952   return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
4953 }
4954
4955 /// getVShiftImm - Check if this is a valid build_vector for the immediate
4956 /// operand of a vector shift operation, where all the elements of the
4957 /// build_vector must have the same constant integer value.
4958 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
4959   // Ignore bit_converts.
4960   while (Op.getOpcode() == ISD::BITCAST)
4961     Op = Op.getOperand(0);
4962   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
4963   APInt SplatBits, SplatUndef;
4964   unsigned SplatBitSize;
4965   bool HasAnyUndefs;
4966   if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
4967                                       HasAnyUndefs, ElementBits) ||
4968       SplatBitSize > ElementBits)
4969     return false;
4970   Cnt = SplatBits.getSExtValue();
4971   return true;
4972 }
4973
4974 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
4975 /// operand of a vector shift left operation.  That value must be in the range:
4976 ///   0 <= Value < ElementBits for a left shift; or
4977 ///   0 <= Value <= ElementBits for a long left shift.
4978 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
4979   assert(VT.isVector() && "vector shift count is not a vector type");
4980   unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
4981   if (! getVShiftImm(Op, ElementBits, Cnt))
4982     return false;
4983   return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
4984 }
4985
4986 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
4987 /// operand of a vector shift right operation.  For a shift opcode, the value
4988 /// is positive, but for an intrinsic the value count must be negative. The
4989 /// absolute value must be in the range:
4990 ///   1 <= |Value| <= ElementBits for a right shift; or
4991 ///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
4992 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
4993                          int64_t &Cnt) {
4994   assert(VT.isVector() && "vector shift count is not a vector type");
4995   unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
4996   if (! getVShiftImm(Op, ElementBits, Cnt))
4997     return false;
4998   if (isIntrinsic)
4999     Cnt = -Cnt;
5000   return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
5001 }
5002
5003 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
5004 static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
5005   unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
5006   switch (IntNo) {
5007   default:
5008     // Don't do anything for most intrinsics.
5009     break;
5010
5011   // Vector shifts: check for immediate versions and lower them.
5012   // Note: This is done during DAG combining instead of DAG legalizing because
5013   // the build_vectors for 64-bit vector element shift counts are generally
5014   // not legal, and it is hard to see their values after they get legalized to
5015   // loads from a constant pool.
5016   case Intrinsic::arm_neon_vshifts:
5017   case Intrinsic::arm_neon_vshiftu:
5018   case Intrinsic::arm_neon_vshiftls:
5019   case Intrinsic::arm_neon_vshiftlu:
5020   case Intrinsic::arm_neon_vshiftn:
5021   case Intrinsic::arm_neon_vrshifts:
5022   case Intrinsic::arm_neon_vrshiftu:
5023   case Intrinsic::arm_neon_vrshiftn:
5024   case Intrinsic::arm_neon_vqshifts:
5025   case Intrinsic::arm_neon_vqshiftu:
5026   case Intrinsic::arm_neon_vqshiftsu:
5027   case Intrinsic::arm_neon_vqshiftns:
5028   case Intrinsic::arm_neon_vqshiftnu:
5029   case Intrinsic::arm_neon_vqshiftnsu:
5030   case Intrinsic::arm_neon_vqrshiftns:
5031   case Intrinsic::arm_neon_vqrshiftnu:
5032   case Intrinsic::arm_neon_vqrshiftnsu: {
5033     EVT VT = N->getOperand(1).getValueType();
5034     int64_t Cnt;
5035     unsigned VShiftOpc = 0;
5036
5037     switch (IntNo) {
5038     case Intrinsic::arm_neon_vshifts:
5039     case Intrinsic::arm_neon_vshiftu:
5040       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
5041         VShiftOpc = ARMISD::VSHL;
5042         break;
5043       }
5044       if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
5045         VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
5046                      ARMISD::VSHRs : ARMISD::VSHRu);
5047         break;
5048       }
5049       return SDValue();
5050
5051     case Intrinsic::arm_neon_vshiftls:
5052     case Intrinsic::arm_neon_vshiftlu:
5053       if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
5054         break;
5055       llvm_unreachable("invalid shift count for vshll intrinsic");
5056
5057     case Intrinsic::arm_neon_vrshifts:
5058     case Intrinsic::arm_neon_vrshiftu:
5059       if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
5060         break;
5061       return SDValue();
5062
5063     case Intrinsic::arm_neon_vqshifts:
5064     case Intrinsic::arm_neon_vqshiftu:
5065       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
5066         break;
5067       return SDValue();
5068
5069     case Intrinsic::arm_neon_vqshiftsu:
5070       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
5071         break;
5072       llvm_unreachable("invalid shift count for vqshlu intrinsic");
5073
5074     case Intrinsic::arm_neon_vshiftn:
5075     case Intrinsic::arm_neon_vrshiftn:
5076     case Intrinsic::arm_neon_vqshiftns:
5077     case Intrinsic::arm_neon_vqshiftnu:
5078     case Intrinsic::arm_neon_vqshiftnsu:
5079     case Intrinsic::arm_neon_vqrshiftns:
5080     case Intrinsic::arm_neon_vqrshiftnu:
5081     case Intrinsic::arm_neon_vqrshiftnsu:
5082       // Narrowing shifts require an immediate right shift.
5083       if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
5084         break;
5085       llvm_unreachable("invalid shift count for narrowing vector shift "
5086                        "intrinsic");
5087
5088     default:
5089       llvm_unreachable("unhandled vector shift");
5090     }
5091
5092     switch (IntNo) {
5093     case Intrinsic::arm_neon_vshifts:
5094     case Intrinsic::arm_neon_vshiftu:
5095       // Opcode already set above.
5096       break;
5097     case Intrinsic::arm_neon_vshiftls:
5098     case Intrinsic::arm_neon_vshiftlu:
5099       if (Cnt == VT.getVectorElementType().getSizeInBits())
5100         VShiftOpc = ARMISD::VSHLLi;
5101       else
5102         VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
5103                      ARMISD::VSHLLs : ARMISD::VSHLLu);
5104       break;
5105     case Intrinsic::arm_neon_vshiftn:
5106       VShiftOpc = ARMISD::VSHRN; break;
5107     case Intrinsic::arm_neon_vrshifts:
5108       VShiftOpc = ARMISD::VRSHRs; break;
5109     case Intrinsic::arm_neon_vrshiftu:
5110       VShiftOpc = ARMISD::VRSHRu; break;
5111     case Intrinsic::arm_neon_vrshiftn:
5112       VShiftOpc = ARMISD::VRSHRN; break;
5113     case Intrinsic::arm_neon_vqshifts:
5114       VShiftOpc = ARMISD::VQSHLs; break;
5115     case Intrinsic::arm_neon_vqshiftu:
5116       VShiftOpc = ARMISD::VQSHLu; break;
5117     case Intrinsic::arm_neon_vqshiftsu:
5118       VShiftOpc = ARMISD::VQSHLsu; break;
5119     case Intrinsic::arm_neon_vqshiftns:
5120       VShiftOpc = ARMISD::VQSHRNs; break;
5121     case Intrinsic::arm_neon_vqshiftnu:
5122       VShiftOpc = ARMISD::VQSHRNu; break;
5123     case Intrinsic::arm_neon_vqshiftnsu:
5124       VShiftOpc = ARMISD::VQSHRNsu; break;
5125     case Intrinsic::arm_neon_vqrshiftns:
5126       VShiftOpc = ARMISD::VQRSHRNs; break;
5127     case Intrinsic::arm_neon_vqrshiftnu:
5128       VShiftOpc = ARMISD::VQRSHRNu; break;
5129     case Intrinsic::arm_neon_vqrshiftnsu:
5130       VShiftOpc = ARMISD::VQRSHRNsu; break;
5131     }
5132
5133     return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
5134                        N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
5135   }
5136
5137   case Intrinsic::arm_neon_vshiftins: {
5138     EVT VT = N->getOperand(1).getValueType();
5139     int64_t Cnt;
5140     unsigned VShiftOpc = 0;
5141
5142     if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
5143       VShiftOpc = ARMISD::VSLI;
5144     else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
5145       VShiftOpc = ARMISD::VSRI;
5146     else {
5147       llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
5148     }
5149
5150     return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
5151                        N->getOperand(1), N->getOperand(2),
5152                        DAG.getConstant(Cnt, MVT::i32));
5153   }
5154
5155   case Intrinsic::arm_neon_vqrshifts:
5156   case Intrinsic::arm_neon_vqrshiftu:
5157     // No immediate versions of these to check for.
5158     break;
5159   }
5160
5161   return SDValue();
5162 }
5163
5164 /// PerformShiftCombine - Checks for immediate versions of vector shifts and
5165 /// lowers them.  As with the vector shift intrinsics, this is done during DAG
5166 /// combining instead of DAG legalizing because the build_vectors for 64-bit
5167 /// vector element shift counts are generally not legal, and it is hard to see
5168 /// their values after they get legalized to loads from a constant pool.
5169 static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
5170                                    const ARMSubtarget *ST) {
5171   EVT VT = N->getValueType(0);
5172
5173   // Nothing to be done for scalar shifts.
5174   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5175   if (!VT.isVector() || !TLI.isTypeLegal(VT))
5176     return SDValue();
5177
5178   assert(ST->hasNEON() && "unexpected vector shift");
5179   int64_t Cnt;
5180
5181   switch (N->getOpcode()) {
5182   default: llvm_unreachable("unexpected shift opcode");
5183
5184   case ISD::SHL:
5185     if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
5186       return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
5187                          DAG.getConstant(Cnt, MVT::i32));
5188     break;
5189
5190   case ISD::SRA:
5191   case ISD::SRL:
5192     if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
5193       unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
5194                             ARMISD::VSHRs : ARMISD::VSHRu);
5195       return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
5196                          DAG.getConstant(Cnt, MVT::i32));
5197     }
5198   }
5199   return SDValue();
5200 }
5201
5202 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
5203 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
5204 static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
5205                                     const ARMSubtarget *ST) {
5206   SDValue N0 = N->getOperand(0);
5207
5208   // Check for sign- and zero-extensions of vector extract operations of 8-
5209   // and 16-bit vector elements.  NEON supports these directly.  They are
5210   // handled during DAG combining because type legalization will promote them
5211   // to 32-bit types and it is messy to recognize the operations after that.
5212   if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
5213     SDValue Vec = N0.getOperand(0);
5214     SDValue Lane = N0.getOperand(1);
5215     EVT VT = N->getValueType(0);
5216     EVT EltVT = N0.getValueType();
5217     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5218
5219     if (VT == MVT::i32 &&
5220         (EltVT == MVT::i8 || EltVT == MVT::i16) &&
5221         TLI.isTypeLegal(Vec.getValueType()) &&
5222         isa<ConstantSDNode>(Lane)) {
5223
5224       unsigned Opc = 0;
5225       switch (N->getOpcode()) {
5226       default: llvm_unreachable("unexpected opcode");
5227       case ISD::SIGN_EXTEND:
5228         Opc = ARMISD::VGETLANEs;
5229         break;
5230       case ISD::ZERO_EXTEND:
5231       case ISD::ANY_EXTEND:
5232         Opc = ARMISD::VGETLANEu;
5233         break;
5234       }
5235       return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
5236     }
5237   }
5238
5239   return SDValue();
5240 }
5241
5242 /// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
5243 /// to match f32 max/min patterns to use NEON vmax/vmin instructions.
5244 static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
5245                                        const ARMSubtarget *ST) {
5246   // If the target supports NEON, try to use vmax/vmin instructions for f32
5247   // selects like "x < y ? x : y".  Unless the NoNaNsFPMath option is set,
5248   // be careful about NaNs:  NEON's vmax/vmin return NaN if either operand is
5249   // a NaN; only do the transformation when it matches that behavior.
5250
5251   // For now only do this when using NEON for FP operations; if using VFP, it
5252   // is not obvious that the benefit outweighs the cost of switching to the
5253   // NEON pipeline.
5254   if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
5255       N->getValueType(0) != MVT::f32)
5256     return SDValue();
5257
5258   SDValue CondLHS = N->getOperand(0);
5259   SDValue CondRHS = N->getOperand(1);
5260   SDValue LHS = N->getOperand(2);
5261   SDValue RHS = N->getOperand(3);
5262   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
5263
5264   unsigned Opcode = 0;
5265   bool IsReversed;
5266   if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
5267     IsReversed = false; // x CC y ? x : y
5268   } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
5269     IsReversed = true ; // x CC y ? y : x
5270   } else {
5271     return SDValue();
5272   }
5273
5274   bool IsUnordered;
5275   switch (CC) {
5276   default: break;
5277   case ISD::SETOLT:
5278   case ISD::SETOLE:
5279   case ISD::SETLT:
5280   case ISD::SETLE:
5281   case ISD::SETULT:
5282   case ISD::SETULE:
5283     // If LHS is NaN, an ordered comparison will be false and the result will
5284     // be the RHS, but vmin(NaN, RHS) = NaN.  Avoid this by checking that LHS
5285     // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
5286     IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
5287     if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
5288       break;
5289     // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
5290     // will return -0, so vmin can only be used for unsafe math or if one of
5291     // the operands is known to be nonzero.
5292     if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
5293         !UnsafeFPMath &&
5294         !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
5295       break;
5296     Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
5297     break;
5298
5299   case ISD::SETOGT:
5300   case ISD::SETOGE:
5301   case ISD::SETGT:
5302   case ISD::SETGE:
5303   case ISD::SETUGT:
5304   case ISD::SETUGE:
5305     // If LHS is NaN, an ordered comparison will be false and the result will
5306     // be the RHS, but vmax(NaN, RHS) = NaN.  Avoid this by checking that LHS
5307     // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
5308     IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
5309     if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
5310       break;
5311     // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
5312     // will return +0, so vmax can only be used for unsafe math or if one of
5313     // the operands is known to be nonzero.
5314     if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
5315         !UnsafeFPMath &&
5316         !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
5317       break;
5318     Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
5319     break;
5320   }
5321
5322   if (!Opcode)
5323     return SDValue();
5324   return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
5325 }
5326
5327 SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
5328                                              DAGCombinerInfo &DCI) const {
5329   switch (N->getOpcode()) {
5330   default: break;
5331   case ISD::ADD:        return PerformADDCombine(N, DCI);
5332   case ISD::SUB:        return PerformSUBCombine(N, DCI);
5333   case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
5334   case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
5335   case ISD::AND:        return PerformANDCombine(N, DCI);
5336   case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
5337   case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
5338   case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI.DAG);
5339   case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
5340   case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
5341   case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
5342   case ISD::SHL:
5343   case ISD::SRA:
5344   case ISD::SRL:        return PerformShiftCombine(N, DCI.DAG, Subtarget);
5345   case ISD::SIGN_EXTEND:
5346   case ISD::ZERO_EXTEND:
5347   case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
5348   case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
5349   }
5350   return SDValue();
5351 }
5352
5353 bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
5354   if (!Subtarget->allowsUnalignedMem())
5355     return false;
5356
5357   switch (VT.getSimpleVT().SimpleTy) {
5358   default:
5359     return false;
5360   case MVT::i8:
5361   case MVT::i16:
5362   case MVT::i32:
5363     return true;
5364   // FIXME: VLD1 etc with standard alignment is legal.
5365   }
5366 }
5367
5368 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
5369   if (V < 0)
5370     return false;
5371
5372   unsigned Scale = 1;
5373   switch (VT.getSimpleVT().SimpleTy) {
5374   default: return false;
5375   case MVT::i1:
5376   case MVT::i8:
5377     // Scale == 1;
5378     break;
5379   case MVT::i16:
5380     // Scale == 2;
5381     Scale = 2;
5382     break;
5383   case MVT::i32:
5384     // Scale == 4;
5385     Scale = 4;
5386     break;
5387   }
5388
5389   if ((V & (Scale - 1)) != 0)
5390     return false;
5391   V /= Scale;
5392   return V == (V & ((1LL << 5) - 1));
5393 }
5394
5395 static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
5396                                       const ARMSubtarget *Subtarget) {
5397   bool isNeg = false;
5398   if (V < 0) {
5399     isNeg = true;
5400     V = - V;
5401   }
5402
5403   switch (VT.getSimpleVT().SimpleTy) {
5404   default: return false;
5405   case MVT::i1:
5406   case MVT::i8:
5407   case MVT::i16:
5408   case MVT::i32:
5409     // + imm12 or - imm8
5410     if (isNeg)
5411       return V == (V & ((1LL << 8) - 1));
5412     return V == (V & ((1LL << 12) - 1));
5413   case MVT::f32:
5414   case MVT::f64:
5415     // Same as ARM mode. FIXME: NEON?
5416     if (!Subtarget->hasVFP2())
5417       return false;
5418     if ((V & 3) != 0)
5419       return false;
5420     V >>= 2;
5421     return V == (V & ((1LL << 8) - 1));
5422   }
5423 }
5424
5425 /// isLegalAddressImmediate - Return true if the integer value can be used
5426 /// as the offset of the target addressing mode for load / store of the
5427 /// given type.
5428 static bool isLegalAddressImmediate(int64_t V, EVT VT,
5429                                     const ARMSubtarget *Subtarget) {
5430   if (V == 0)
5431     return true;
5432
5433   if (!VT.isSimple())
5434     return false;
5435
5436   if (Subtarget->isThumb1Only())
5437     return isLegalT1AddressImmediate(V, VT);
5438   else if (Subtarget->isThumb2())
5439     return isLegalT2AddressImmediate(V, VT, Subtarget);
5440
5441   // ARM mode.
5442   if (V < 0)
5443     V = - V;
5444   switch (VT.getSimpleVT().SimpleTy) {
5445   default: return false;
5446   case MVT::i1:
5447   case MVT::i8:
5448   case MVT::i32:
5449     // +- imm12
5450     return V == (V & ((1LL << 12) - 1));
5451   case MVT::i16:
5452     // +- imm8
5453     return V == (V & ((1LL << 8) - 1));
5454   case MVT::f32:
5455   case MVT::f64:
5456     if (!Subtarget->hasVFP2()) // FIXME: NEON?
5457       return false;
5458     if ((V & 3) != 0)
5459       return false;
5460     V >>= 2;
5461     return V == (V & ((1LL << 8) - 1));
5462   }
5463 }
5464
5465 bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
5466                                                       EVT VT) const {
5467   int Scale = AM.Scale;
5468   if (Scale < 0)
5469     return false;
5470
5471   switch (VT.getSimpleVT().SimpleTy) {
5472   default: return false;
5473   case MVT::i1:
5474   case MVT::i8:
5475   case MVT::i16:
5476   case MVT::i32:
5477     if (Scale == 1)
5478       return true;
5479     // r + r << imm
5480     Scale = Scale & ~1;
5481     return Scale == 2 || Scale == 4 || Scale == 8;
5482   case MVT::i64:
5483     // r + r
5484     if (((unsigned)AM.HasBaseReg + Scale) <= 2)
5485       return true;
5486     return false;
5487   case MVT::isVoid:
5488     // Note, we allow "void" uses (basically, uses that aren't loads or
5489     // stores), because arm allows folding a scale into many arithmetic
5490     // operations.  This should be made more precise and revisited later.
5491
5492     // Allow r << imm, but the imm has to be a multiple of two.
5493     if (Scale & 1) return false;
5494     return isPowerOf2_32(Scale);
5495   }
5496 }
5497
5498 /// isLegalAddressingMode - Return true if the addressing mode represented
5499 /// by AM is legal for this target, for a load/store of the specified type.
5500 bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
5501                                               const Type *Ty) const {
5502   EVT VT = getValueType(Ty, true);
5503   if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
5504     return false;
5505
5506   // Can never fold addr of global into load/store.
5507   if (AM.BaseGV)
5508     return false;
5509
5510   switch (AM.Scale) {
5511   case 0:  // no scale reg, must be "r+i" or "r", or "i".
5512     break;
5513   case 1:
5514     if (Subtarget->isThumb1Only())
5515       return false;
5516     // FALL THROUGH.
5517   default:
5518     // ARM doesn't support any R+R*scale+imm addr modes.
5519     if (AM.BaseOffs)
5520       return false;
5521
5522     if (!VT.isSimple())
5523       return false;
5524
5525     if (Subtarget->isThumb2())
5526       return isLegalT2ScaledAddressingMode(AM, VT);
5527
5528     int Scale = AM.Scale;
5529     switch (VT.getSimpleVT().SimpleTy) {
5530     default: return false;
5531     case MVT::i1:
5532     case MVT::i8:
5533     case MVT::i32:
5534       if (Scale < 0) Scale = -Scale;
5535       if (Scale == 1)
5536         return true;
5537       // r + r << imm
5538       return isPowerOf2_32(Scale & ~1);
5539     case MVT::i16:
5540     case MVT::i64:
5541       // r + r
5542       if (((unsigned)AM.HasBaseReg + Scale) <= 2)
5543         return true;
5544       return false;
5545
5546     case MVT::isVoid:
5547       // Note, we allow "void" uses (basically, uses that aren't loads or
5548       // stores), because arm allows folding a scale into many arithmetic
5549       // operations.  This should be made more precise and revisited later.
5550
5551       // Allow r << imm, but the imm has to be a multiple of two.
5552       if (Scale & 1) return false;
5553       return isPowerOf2_32(Scale);
5554     }
5555     break;
5556   }
5557   return true;
5558 }
5559
5560 /// isLegalICmpImmediate - Return true if the specified immediate is legal
5561 /// icmp immediate, that is the target has icmp instructions which can compare
5562 /// a register against the immediate without having to materialize the
5563 /// immediate into a register.
5564 bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
5565   if (!Subtarget->isThumb())
5566     return ARM_AM::getSOImmVal(Imm) != -1;
5567   if (Subtarget->isThumb2())
5568     return ARM_AM::getT2SOImmVal(Imm) != -1;
5569   return Imm >= 0 && Imm <= 255;
5570 }
5571
5572 static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
5573                                       bool isSEXTLoad, SDValue &Base,
5574                                       SDValue &Offset, bool &isInc,
5575                                       SelectionDAG &DAG) {
5576   if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
5577     return false;
5578
5579   if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
5580     // AddressingMode 3
5581     Base = Ptr->getOperand(0);
5582     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
5583       int RHSC = (int)RHS->getZExtValue();
5584       if (RHSC < 0 && RHSC > -256) {
5585         assert(Ptr->getOpcode() == ISD::ADD);
5586         isInc = false;
5587         Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
5588         return true;
5589       }
5590     }
5591     isInc = (Ptr->getOpcode() == ISD::ADD);
5592     Offset = Ptr->getOperand(1);
5593     return true;
5594   } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
5595     // AddressingMode 2
5596     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
5597       int RHSC = (int)RHS->getZExtValue();
5598       if (RHSC < 0 && RHSC > -0x1000) {
5599         assert(Ptr->getOpcode() == ISD::ADD);
5600         isInc = false;
5601         Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
5602         Base = Ptr->getOperand(0);
5603         return true;
5604       }
5605     }
5606
5607     if (Ptr->getOpcode() == ISD::ADD) {
5608       isInc = true;
5609       ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
5610       if (ShOpcVal != ARM_AM::no_shift) {
5611         Base = Ptr->getOperand(1);
5612         Offset = Ptr->getOperand(0);
5613       } else {
5614         Base = Ptr->getOperand(0);
5615         Offset = Ptr->getOperand(1);
5616       }
5617       return true;
5618     }
5619
5620     isInc = (Ptr->getOpcode() == ISD::ADD);
5621     Base = Ptr->getOperand(0);
5622     Offset = Ptr->getOperand(1);
5623     return true;
5624   }
5625
5626   // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
5627   return false;
5628 }
5629
5630 static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
5631                                      bool isSEXTLoad, SDValue &Base,
5632                                      SDValue &Offset, bool &isInc,
5633                                      SelectionDAG &DAG) {
5634   if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
5635     return false;
5636
5637   Base = Ptr->getOperand(0);
5638   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
5639     int RHSC = (int)RHS->getZExtValue();
5640     if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
5641       assert(Ptr->getOpcode() == ISD::ADD);
5642       isInc = false;
5643       Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
5644       return true;
5645     } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
5646       isInc = Ptr->getOpcode() == ISD::ADD;
5647       Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
5648       return true;
5649     }
5650   }
5651
5652   return false;
5653 }
5654
5655 /// getPreIndexedAddressParts - returns true by value, base pointer and
5656 /// offset pointer and addressing mode by reference if the node's address
5657 /// can be legally represented as pre-indexed load / store address.
5658 bool
5659 ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
5660                                              SDValue &Offset,
5661                                              ISD::MemIndexedMode &AM,
5662                                              SelectionDAG &DAG) const {
5663   if (Subtarget->isThumb1Only())
5664     return false;
5665
5666   EVT VT;
5667   SDValue Ptr;
5668   bool isSEXTLoad = false;
5669   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
5670     Ptr = LD->getBasePtr();
5671     VT  = LD->getMemoryVT();
5672     isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
5673   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
5674     Ptr = ST->getBasePtr();
5675     VT  = ST->getMemoryVT();
5676   } else
5677     return false;
5678
5679   bool isInc;
5680   bool isLegal = false;
5681   if (Subtarget->isThumb2())
5682     isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
5683                                        Offset, isInc, DAG);
5684   else
5685     isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
5686                                         Offset, isInc, DAG);
5687   if (!isLegal)
5688     return false;
5689
5690   AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
5691   return true;
5692 }
5693
5694 /// getPostIndexedAddressParts - returns true by value, base pointer and
5695 /// offset pointer and addressing mode by reference if this node can be
5696 /// combined with a load / store to form a post-indexed load / store.
5697 bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
5698                                                    SDValue &Base,
5699                                                    SDValue &Offset,
5700                                                    ISD::MemIndexedMode &AM,
5701                                                    SelectionDAG &DAG) const {
5702   if (Subtarget->isThumb1Only())
5703     return false;
5704
5705   EVT VT;
5706   SDValue Ptr;
5707   bool isSEXTLoad = false;
5708   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
5709     VT  = LD->getMemoryVT();
5710     Ptr = LD->getBasePtr();
5711     isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
5712   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
5713     VT  = ST->getMemoryVT();
5714     Ptr = ST->getBasePtr();
5715   } else
5716     return false;
5717
5718   bool isInc;
5719   bool isLegal = false;
5720   if (Subtarget->isThumb2())
5721     isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
5722                                        isInc, DAG);
5723   else
5724     isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
5725                                         isInc, DAG);
5726   if (!isLegal)
5727     return false;
5728
5729   if (Ptr != Base) {
5730     // Swap base ptr and offset to catch more post-index load / store when
5731     // it's legal. In Thumb2 mode, offset must be an immediate.
5732     if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
5733         !Subtarget->isThumb2())
5734       std::swap(Base, Offset);
5735
5736     // Post-indexed load / store update the base pointer.
5737     if (Ptr != Base)
5738       return false;
5739   }
5740
5741   AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
5742   return true;
5743 }
5744
5745 void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
5746                                                        const APInt &Mask,
5747                                                        APInt &KnownZero,
5748                                                        APInt &KnownOne,
5749                                                        const SelectionDAG &DAG,
5750                                                        unsigned Depth) const {
5751   KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
5752   switch (Op.getOpcode()) {
5753   default: break;
5754   case ARMISD::CMOV: {
5755     // Bits are known zero/one if known on the LHS and RHS.
5756     DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
5757     if (KnownZero == 0 && KnownOne == 0) return;
5758
5759     APInt KnownZeroRHS, KnownOneRHS;
5760     DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
5761                           KnownZeroRHS, KnownOneRHS, Depth+1);
5762     KnownZero &= KnownZeroRHS;
5763     KnownOne  &= KnownOneRHS;
5764     return;
5765   }
5766   }
5767 }
5768
5769 //===----------------------------------------------------------------------===//
5770 //                           ARM Inline Assembly Support
5771 //===----------------------------------------------------------------------===//
5772
5773 /// getConstraintType - Given a constraint letter, return the type of
5774 /// constraint it is for this target.
5775 ARMTargetLowering::ConstraintType
5776 ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
5777   if (Constraint.size() == 1) {
5778     switch (Constraint[0]) {
5779     default:  break;
5780     case 'l': return C_RegisterClass;
5781     case 'w': return C_RegisterClass;
5782     }
5783   }
5784   return TargetLowering::getConstraintType(Constraint);
5785 }
5786
5787 /// Examine constraint type and operand type and determine a weight value.
5788 /// This object must already have been set up with the operand type
5789 /// and the current alternative constraint selected.
5790 TargetLowering::ConstraintWeight
5791 ARMTargetLowering::getSingleConstraintMatchWeight(
5792     AsmOperandInfo &info, const char *constraint) const {
5793   ConstraintWeight weight = CW_Invalid;
5794   Value *CallOperandVal = info.CallOperandVal;
5795     // If we don't have a value, we can't do a match,
5796     // but allow it at the lowest weight.
5797   if (CallOperandVal == NULL)
5798     return CW_Default;
5799   const Type *type = CallOperandVal->getType();
5800   // Look at the constraint type.
5801   switch (*constraint) {
5802   default:
5803     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
5804     break;
5805   case 'l':
5806     if (type->isIntegerTy()) {
5807       if (Subtarget->isThumb())
5808         weight = CW_SpecificReg;
5809       else
5810         weight = CW_Register;
5811     }
5812     break;
5813   case 'w':
5814     if (type->isFloatingPointTy())
5815       weight = CW_Register;
5816     break;
5817   }
5818   return weight;
5819 }
5820
5821 std::pair<unsigned, const TargetRegisterClass*>
5822 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
5823                                                 EVT VT) const {
5824   if (Constraint.size() == 1) {
5825     // GCC ARM Constraint Letters
5826     switch (Constraint[0]) {
5827     case 'l':
5828       if (Subtarget->isThumb())
5829         return std::make_pair(0U, ARM::tGPRRegisterClass);
5830       else
5831         return std::make_pair(0U, ARM::GPRRegisterClass);
5832     case 'r':
5833       return std::make_pair(0U, ARM::GPRRegisterClass);
5834     case 'w':
5835       if (VT == MVT::f32)
5836         return std::make_pair(0U, ARM::SPRRegisterClass);
5837       if (VT.getSizeInBits() == 64)
5838         return std::make_pair(0U, ARM::DPRRegisterClass);
5839       if (VT.getSizeInBits() == 128)
5840         return std::make_pair(0U, ARM::QPRRegisterClass);
5841       break;
5842     }
5843   }
5844   if (StringRef("{cc}").equals_lower(Constraint))
5845     return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
5846
5847   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
5848 }
5849
5850 std::vector<unsigned> ARMTargetLowering::
5851 getRegClassForInlineAsmConstraint(const std::string &Constraint,
5852                                   EVT VT) const {
5853   if (Constraint.size() != 1)
5854     return std::vector<unsigned>();
5855
5856   switch (Constraint[0]) {      // GCC ARM Constraint Letters
5857   default: break;
5858   case 'l':
5859     return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
5860                                  ARM::R4, ARM::R5, ARM::R6, ARM::R7,
5861                                  0);
5862   case 'r':
5863     return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
5864                                  ARM::R4, ARM::R5, ARM::R6, ARM::R7,
5865                                  ARM::R8, ARM::R9, ARM::R10, ARM::R11,
5866                                  ARM::R12, ARM::LR, 0);
5867   case 'w':
5868     if (VT == MVT::f32)
5869       return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3,
5870                                    ARM::S4, ARM::S5, ARM::S6, ARM::S7,
5871                                    ARM::S8, ARM::S9, ARM::S10, ARM::S11,
5872                                    ARM::S12,ARM::S13,ARM::S14,ARM::S15,
5873                                    ARM::S16,ARM::S17,ARM::S18,ARM::S19,
5874                                    ARM::S20,ARM::S21,ARM::S22,ARM::S23,
5875                                    ARM::S24,ARM::S25,ARM::S26,ARM::S27,
5876                                    ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
5877     if (VT.getSizeInBits() == 64)
5878       return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
5879                                    ARM::D4, ARM::D5, ARM::D6, ARM::D7,
5880                                    ARM::D8, ARM::D9, ARM::D10,ARM::D11,
5881                                    ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
5882     if (VT.getSizeInBits() == 128)
5883       return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
5884                                    ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0);
5885       break;
5886   }
5887
5888   return std::vector<unsigned>();
5889 }
5890
5891 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
5892 /// vector.  If it is invalid, don't add anything to Ops.
5893 void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5894                                                      char Constraint,
5895                                                      std::vector<SDValue>&Ops,
5896                                                      SelectionDAG &DAG) const {
5897   SDValue Result(0, 0);
5898
5899   switch (Constraint) {
5900   default: break;
5901   case 'I': case 'J': case 'K': case 'L':
5902   case 'M': case 'N': case 'O':
5903     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5904     if (!C)
5905       return;
5906
5907     int64_t CVal64 = C->getSExtValue();
5908     int CVal = (int) CVal64;
5909     // None of these constraints allow values larger than 32 bits.  Check
5910     // that the value fits in an int.
5911     if (CVal != CVal64)
5912       return;
5913
5914     switch (Constraint) {
5915       case 'I':
5916         if (Subtarget->isThumb1Only()) {
5917           // This must be a constant between 0 and 255, for ADD
5918           // immediates.
5919           if (CVal >= 0 && CVal <= 255)
5920             break;
5921         } else if (Subtarget->isThumb2()) {
5922           // A constant that can be used as an immediate value in a
5923           // data-processing instruction.
5924           if (ARM_AM::getT2SOImmVal(CVal) != -1)
5925             break;
5926         } else {
5927           // A constant that can be used as an immediate value in a
5928           // data-processing instruction.
5929           if (ARM_AM::getSOImmVal(CVal) != -1)
5930             break;
5931         }
5932         return;
5933
5934       case 'J':
5935         if (Subtarget->isThumb()) {  // FIXME thumb2
5936           // This must be a constant between -255 and -1, for negated ADD
5937           // immediates. This can be used in GCC with an "n" modifier that
5938           // prints the negated value, for use with SUB instructions. It is
5939           // not useful otherwise but is implemented for compatibility.
5940           if (CVal >= -255 && CVal <= -1)
5941             break;
5942         } else {
5943           // This must be a constant between -4095 and 4095. It is not clear
5944           // what this constraint is intended for. Implemented for
5945           // compatibility with GCC.
5946           if (CVal >= -4095 && CVal <= 4095)
5947             break;
5948         }
5949         return;
5950
5951       case 'K':
5952         if (Subtarget->isThumb1Only()) {
5953           // A 32-bit value where only one byte has a nonzero value. Exclude
5954           // zero to match GCC. This constraint is used by GCC internally for
5955           // constants that can be loaded with a move/shift combination.
5956           // It is not useful otherwise but is implemented for compatibility.
5957           if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
5958             break;
5959         } else if (Subtarget->isThumb2()) {
5960           // A constant whose bitwise inverse can be used as an immediate
5961           // value in a data-processing instruction. This can be used in GCC
5962           // with a "B" modifier that prints the inverted value, for use with
5963           // BIC and MVN instructions. It is not useful otherwise but is
5964           // implemented for compatibility.
5965           if (ARM_AM::getT2SOImmVal(~CVal) != -1)
5966             break;
5967         } else {
5968           // A constant whose bitwise inverse can be used as an immediate
5969           // value in a data-processing instruction. This can be used in GCC
5970           // with a "B" modifier that prints the inverted value, for use with
5971           // BIC and MVN instructions. It is not useful otherwise but is
5972           // implemented for compatibility.
5973           if (ARM_AM::getSOImmVal(~CVal) != -1)
5974             break;
5975         }
5976         return;
5977
5978       case 'L':
5979         if (Subtarget->isThumb1Only()) {
5980           // This must be a constant between -7 and 7,
5981           // for 3-operand ADD/SUB immediate instructions.
5982           if (CVal >= -7 && CVal < 7)
5983             break;
5984         } else if (Subtarget->isThumb2()) {
5985           // A constant whose negation can be used as an immediate value in a
5986           // data-processing instruction. This can be used in GCC with an "n"
5987           // modifier that prints the negated value, for use with SUB
5988           // instructions. It is not useful otherwise but is implemented for
5989           // compatibility.
5990           if (ARM_AM::getT2SOImmVal(-CVal) != -1)
5991             break;
5992         } else {
5993           // A constant whose negation can be used as an immediate value in a
5994           // data-processing instruction. This can be used in GCC with an "n"
5995           // modifier that prints the negated value, for use with SUB
5996           // instructions. It is not useful otherwise but is implemented for
5997           // compatibility.
5998           if (ARM_AM::getSOImmVal(-CVal) != -1)
5999             break;
6000         }
6001         return;
6002
6003       case 'M':
6004         if (Subtarget->isThumb()) { // FIXME thumb2
6005           // This must be a multiple of 4 between 0 and 1020, for
6006           // ADD sp + immediate.
6007           if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
6008             break;
6009         } else {
6010           // A power of two or a constant between 0 and 32.  This is used in
6011           // GCC for the shift amount on shifted register operands, but it is
6012           // useful in general for any shift amounts.
6013           if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
6014             break;
6015         }
6016         return;
6017
6018       case 'N':
6019         if (Subtarget->isThumb()) {  // FIXME thumb2
6020           // This must be a constant between 0 and 31, for shift amounts.
6021           if (CVal >= 0 && CVal <= 31)
6022             break;
6023         }
6024         return;
6025
6026       case 'O':
6027         if (Subtarget->isThumb()) {  // FIXME thumb2
6028           // This must be a multiple of 4 between -508 and 508, for
6029           // ADD/SUB sp = sp + immediate.
6030           if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
6031             break;
6032         }
6033         return;
6034     }
6035     Result = DAG.getTargetConstant(CVal, Op.getValueType());
6036     break;
6037   }
6038
6039   if (Result.getNode()) {
6040     Ops.push_back(Result);
6041     return;
6042   }
6043   return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
6044 }
6045
6046 bool
6047 ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
6048   // The ARM target isn't yet aware of offsets.
6049   return false;
6050 }
6051
6052 int ARM::getVFPf32Imm(const APFloat &FPImm) {
6053   APInt Imm = FPImm.bitcastToAPInt();
6054   uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
6055   int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127;  // -126 to 127
6056   int64_t Mantissa = Imm.getZExtValue() & 0x7fffff;  // 23 bits
6057
6058   // We can handle 4 bits of mantissa.
6059   // mantissa = (16+UInt(e:f:g:h))/16.
6060   if (Mantissa & 0x7ffff)
6061     return -1;
6062   Mantissa >>= 19;
6063   if ((Mantissa & 0xf) != Mantissa)
6064     return -1;
6065
6066   // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
6067   if (Exp < -3 || Exp > 4)
6068     return -1;
6069   Exp = ((Exp+3) & 0x7) ^ 4;
6070
6071   return ((int)Sign << 7) | (Exp << 4) | Mantissa;
6072 }
6073
6074 int ARM::getVFPf64Imm(const APFloat &FPImm) {
6075   APInt Imm = FPImm.bitcastToAPInt();
6076   uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
6077   int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023;   // -1022 to 1023
6078   uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL;
6079
6080   // We can handle 4 bits of mantissa.
6081   // mantissa = (16+UInt(e:f:g:h))/16.
6082   if (Mantissa & 0xffffffffffffLL)
6083     return -1;
6084   Mantissa >>= 48;
6085   if ((Mantissa & 0xf) != Mantissa)
6086     return -1;
6087
6088   // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
6089   if (Exp < -3 || Exp > 4)
6090     return -1;
6091   Exp = ((Exp+3) & 0x7) ^ 4;
6092
6093   return ((int)Sign << 7) | (Exp << 4) | Mantissa;
6094 }
6095
6096 bool ARM::isBitFieldInvertedMask(unsigned v) {
6097   if (v == 0xffffffff)
6098     return 0;
6099   // there can be 1's on either or both "outsides", all the "inside"
6100   // bits must be 0's
6101   unsigned int lsb = 0, msb = 31;
6102   while (v & (1 << msb)) --msb;
6103   while (v & (1 << lsb)) ++lsb;
6104   for (unsigned int i = lsb; i <= msb; ++i) {
6105     if (v & (1 << i))
6106       return 0;
6107   }
6108   return 1;
6109 }
6110
6111 /// isFPImmLegal - Returns true if the target can instruction select the
6112 /// specified FP immediate natively. If false, the legalizer will
6113 /// materialize the FP immediate as a load from a constant pool.
6114 bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
6115   if (!Subtarget->hasVFP3())
6116     return false;
6117   if (VT == MVT::f32)
6118     return ARM::getVFPf32Imm(Imm) != -1;
6119   if (VT == MVT::f64)
6120     return ARM::getVFPf64Imm(Imm) != -1;
6121   return false;
6122 }
6123
6124 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
6125 /// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
6126 /// specified in the intrinsic calls.
6127 bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
6128                                            const CallInst &I,
6129                                            unsigned Intrinsic) const {
6130   switch (Intrinsic) {
6131   case Intrinsic::arm_neon_vld1:
6132   case Intrinsic::arm_neon_vld2:
6133   case Intrinsic::arm_neon_vld3:
6134   case Intrinsic::arm_neon_vld4:
6135   case Intrinsic::arm_neon_vld2lane:
6136   case Intrinsic::arm_neon_vld3lane:
6137   case Intrinsic::arm_neon_vld4lane: {
6138     Info.opc = ISD::INTRINSIC_W_CHAIN;
6139     // Conservatively set memVT to the entire set of vectors loaded.
6140     uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8;
6141     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
6142     Info.ptrVal = I.getArgOperand(0);
6143     Info.offset = 0;
6144     Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
6145     Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
6146     Info.vol = false; // volatile loads with NEON intrinsics not supported
6147     Info.readMem = true;
6148     Info.writeMem = false;
6149     return true;
6150   }
6151   case Intrinsic::arm_neon_vst1:
6152   case Intrinsic::arm_neon_vst2:
6153   case Intrinsic::arm_neon_vst3:
6154   case Intrinsic::arm_neon_vst4:
6155   case Intrinsic::arm_neon_vst2lane:
6156   case Intrinsic::arm_neon_vst3lane:
6157   case Intrinsic::arm_neon_vst4lane: {
6158     Info.opc = ISD::INTRINSIC_VOID;
6159     // Conservatively set memVT to the entire set of vectors stored.
6160     unsigned NumElts = 0;
6161     for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
6162       const Type *ArgTy = I.getArgOperand(ArgI)->getType();
6163       if (!ArgTy->isVectorTy())
6164         break;
6165       NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8;
6166     }
6167     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
6168     Info.ptrVal = I.getArgOperand(0);
6169     Info.offset = 0;
6170     Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
6171     Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
6172     Info.vol = false; // volatile stores with NEON intrinsics not supported
6173     Info.readMem = false;
6174     Info.writeMem = true;
6175     return true;
6176   }
6177   default:
6178     break;
6179   }
6180
6181   return false;
6182 }