lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

   1 //===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This implements routines for translating from LLVM IR into SelectionDAG IR.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #define DEBUG_TYPE "isel"
  15 #include "SDNodeDbgValue.h"
  16 #include "SelectionDAGBuilder.h"
  17 #include "llvm/ADT/BitVector.h"
  18 #include "llvm/ADT/SmallSet.h"
  19 #include "llvm/Analysis/AliasAnalysis.h"
  20 #include "llvm/Analysis/ConstantFolding.h"
  21 #include "llvm/Constants.h"
  22 #include "llvm/CallingConv.h"
  23 #include "llvm/DerivedTypes.h"
  24 #include "llvm/Function.h"
  25 #include "llvm/GlobalVariable.h"
  26 #include "llvm/InlineAsm.h"
  27 #include "llvm/Instructions.h"
  28 #include "llvm/Intrinsics.h"
  29 #include "llvm/IntrinsicInst.h"
  30 #include "llvm/LLVMContext.h"
  31 #include "llvm/Module.h"
  32 #include "llvm/CodeGen/Analysis.h"
  33 #include "llvm/CodeGen/FastISel.h"
  34 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  35 #include "llvm/CodeGen/GCStrategy.h"
  36 #include "llvm/CodeGen/GCMetadata.h"
  37 #include "llvm/CodeGen/MachineFunction.h"
  38 #include "llvm/CodeGen/MachineFrameInfo.h"
  39 #include "llvm/CodeGen/MachineInstrBuilder.h"
  40 #include "llvm/CodeGen/MachineJumpTableInfo.h"
  41 #include "llvm/CodeGen/MachineModuleInfo.h"
  42 #include "llvm/CodeGen/MachineRegisterInfo.h"
  43 #include "llvm/CodeGen/PseudoSourceValue.h"
  44 #include "llvm/CodeGen/SelectionDAG.h"
  45 #include "llvm/Analysis/DebugInfo.h"
  46 #include "llvm/Target/TargetRegisterInfo.h"
  47 #include "llvm/Target/TargetData.h"
  48 #include "llvm/Target/TargetFrameInfo.h"
  49 #include "llvm/Target/TargetInstrInfo.h"
  50 #include "llvm/Target/TargetIntrinsicInfo.h"
  51 #include "llvm/Target/TargetLowering.h"
  52 #include "llvm/Target/TargetOptions.h"
  53 #include "llvm/Support/Compiler.h"
  54 #include "llvm/Support/CommandLine.h"
  55 #include "llvm/Support/Debug.h"
  56 #include "llvm/Support/ErrorHandling.h"
  57 #include "llvm/Support/MathExtras.h"
  58 #include "llvm/Support/raw_ostream.h"
  59 #include <algorithm>
  60 using namespace llvm;
  61
  62 /// LimitFloatPrecision - Generate low-precision inline sequences for
  63 /// some float libcalls (6, 8 or 12 bits).
  64 static unsigned LimitFloatPrecision;
  65
  66 static cl::opt<unsigned, true>
  67 LimitFPPrecision("limit-float-precision",
  68                  cl::desc("Generate low-precision inline sequences "
  69                           "for some float libcalls"),
  70                  cl::location(LimitFloatPrecision),
  71                  cl::init(0));
  72
  73 /// getCopyFromParts - Create a value that contains the specified legal parts
  74 /// combined into the value they represent.  If the parts combine to a type
  75 /// larger then ValueVT then AssertOp can be used to specify whether the extra
  76 /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
  77 /// (ISD::AssertSext).
  78 static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
  79                                 const SDValue *Parts,
  80                                 unsigned NumParts, EVT PartVT, EVT ValueVT,
  81                                 ISD::NodeType AssertOp = ISD::DELETED_NODE) {
  82   assert(NumParts > 0 && "No parts to assemble!");
  83   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  84   SDValue Val = Parts[0];
  85
  86   if (NumParts > 1) {
  87     // Assemble the value from multiple parts.
  88     if (!ValueVT.isVector() && ValueVT.isInteger()) {
  89       unsigned PartBits = PartVT.getSizeInBits();
  90       unsigned ValueBits = ValueVT.getSizeInBits();
  91
  92       // Assemble the power of 2 part.
  93       unsigned RoundParts = NumParts & (NumParts - 1) ?
  94         1 << Log2_32(NumParts) : NumParts;
  95       unsigned RoundBits = PartBits * RoundParts;
  96       EVT RoundVT = RoundBits == ValueBits ?
  97         ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
  98       SDValue Lo, Hi;
  99
 100       EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
 101
 102       if (RoundParts > 2) {
 103         Lo = getCopyFromParts(DAG, dl, Parts, RoundParts / 2,
 104                               PartVT, HalfVT);
 105         Hi = getCopyFromParts(DAG, dl, Parts + RoundParts / 2,
 106                               RoundParts / 2, PartVT, HalfVT);
 107       } else {
 108         Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);
 109         Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]);
 110       }
 111
 112       if (TLI.isBigEndian())
 113         std::swap(Lo, Hi);
 114
 115       Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi);
 116
 117       if (RoundParts < NumParts) {
 118         // Assemble the trailing non-power-of-2 part.
 119         unsigned OddParts = NumParts - RoundParts;
 120         EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
 121         Hi = getCopyFromParts(DAG, dl,
 122                               Parts + RoundParts, OddParts, PartVT, OddVT);
 123
 124         // Combine the round and odd parts.
 125         Lo = Val;
 126         if (TLI.isBigEndian())
 127           std::swap(Lo, Hi);
 128         EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
 129         Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi);
 130         Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi,
 131                          DAG.getConstant(Lo.getValueType().getSizeInBits(),
 132                                          TLI.getPointerTy()));
 133         Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo);
 134         Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi);
 135       }
 136     } else if (ValueVT.isVector()) {
 137       // Handle a multi-element vector.
 138       EVT IntermediateVT, RegisterVT;
 139       unsigned NumIntermediates;
 140       unsigned NumRegs =
 141         TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
 142                                    NumIntermediates, RegisterVT);
 143       assert(NumRegs == NumParts
 144              && "Part count doesn't match vector breakdown!");
 145       NumParts = NumRegs; // Silence a compiler warning.
 146       assert(RegisterVT == PartVT
 147              && "Part type doesn't match vector breakdown!");
 148       assert(RegisterVT == Parts[0].getValueType() &&
 149              "Part type doesn't match part!");
 150
 151       // Assemble the parts into intermediate operands.
 152       SmallVector<SDValue, 8> Ops(NumIntermediates);
 153       if (NumIntermediates == NumParts) {
 154         // If the register was not expanded, truncate or copy the value,
 155         // as appropriate.
 156         for (unsigned i = 0; i != NumParts; ++i)
 157           Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1,
 158                                     PartVT, IntermediateVT);
 159       } else if (NumParts > 0) {
 160         // If the intermediate type was expanded, build the intermediate
 161         // operands from the parts.
 162         assert(NumParts % NumIntermediates == 0 &&
 163                "Must expand into a divisible number of parts!");
 164         unsigned Factor = NumParts / NumIntermediates;
 165         for (unsigned i = 0; i != NumIntermediates; ++i)
 166           Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor,
 167                                     PartVT, IntermediateVT);
 168       }
 169
 170       // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
 171       // intermediate operands.
 172       Val = DAG.getNode(IntermediateVT.isVector() ?
 173                         ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl,
 174                         ValueVT, &Ops[0], NumIntermediates);
 175     } else if (PartVT.isFloatingPoint()) {
 176       // FP split into multiple FP parts (for ppcf128)
 177       assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
 178              "Unexpected split");
 179       SDValue Lo, Hi;
 180       Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]);
 181       Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]);
 182       if (TLI.isBigEndian())
 183         std::swap(Lo, Hi);
 184       Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);
 185     } else {
 186       // FP split into integer parts (soft fp)
 187       assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
 188              !PartVT.isVector() && "Unexpected split");
 189       EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
 190       Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
 191     }
 192   }
 193
 194   // There is now one part, held in Val.  Correct it to match ValueVT.
 195   PartVT = Val.getValueType();
 196
 197   if (PartVT == ValueVT)
 198     return Val;
 199
 200   if (PartVT.isVector()) {
 201     assert(ValueVT.isVector() && "Unknown vector conversion!");
 202     return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
 203   }
 204
 205   if (ValueVT.isVector()) {
 206     assert(ValueVT.getVectorElementType() == PartVT &&
 207            ValueVT.getVectorNumElements() == 1 &&
 208            "Only trivial scalar-to-vector conversions should get here!");
 209     return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val);
 210   }
 211
 212   if (PartVT.isInteger() &&
 213       ValueVT.isInteger()) {
 214     if (ValueVT.bitsLT(PartVT)) {
 215       // For a truncate, see if we have any information to
 216       // indicate whether the truncated bits will always be
 217       // zero or sign-extension.
 218       if (AssertOp != ISD::DELETED_NODE)
 219         Val = DAG.getNode(AssertOp, dl, PartVT, Val,
 220                           DAG.getValueType(ValueVT));
 221       return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
 222     } else {
 223       return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val);
 224     }
 225   }
 226
 227   if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
 228     if (ValueVT.bitsLT(Val.getValueType())) {
 229       // FP_ROUND's are always exact here.
 230       return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val,
 231                          DAG.getIntPtrConstant(1));
 232     }
 233
 234     return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val);
 235   }
 236
 237   if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
 238     return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
 239
 240   llvm_unreachable("Unknown mismatch!");
 241   return SDValue();
 242 }
 243
 244 /// getCopyToParts - Create a series of nodes that contain the specified value
 245 /// split into legal parts.  If the parts contain more bits than Val, then, for
 246 /// integers, ExtendKind can be used to specify how to generate the extra bits.
 247 static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl,
 248                            SDValue Val, SDValue *Parts, unsigned NumParts,
 249                            EVT PartVT,
 250                            ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
 251   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 252   EVT PtrVT = TLI.getPointerTy();
 253   EVT ValueVT = Val.getValueType();
 254   unsigned PartBits = PartVT.getSizeInBits();
 255   unsigned OrigNumParts = NumParts;
 256   assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
 257
 258   if (!NumParts)
 259     return;
 260
 261   if (!ValueVT.isVector()) {
 262     if (PartVT == ValueVT) {
 263       assert(NumParts == 1 && "No-op copy with multiple parts!");
 264       Parts[0] = Val;
 265       return;
 266     }
 267
 268     if (NumParts * PartBits > ValueVT.getSizeInBits()) {
 269       // If the parts cover more bits than the value has, promote the value.
 270       if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
 271         assert(NumParts == 1 && "Do not know what to promote to!");
 272         Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val);
 273       } else if (PartVT.isInteger() && ValueVT.isInteger()) {
 274         ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
 275         Val = DAG.getNode(ExtendKind, dl, ValueVT, Val);
 276       } else {
 277         llvm_unreachable("Unknown mismatch!");
 278       }
 279     } else if (PartBits == ValueVT.getSizeInBits()) {
 280       // Different types of the same size.
 281       assert(NumParts == 1 && PartVT != ValueVT);
 282       Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
 283     } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
 284       // If the parts cover less bits than value has, truncate the value.
 285       if (PartVT.isInteger() && ValueVT.isInteger()) {
 286         ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
 287         Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
 288       } else {
 289         llvm_unreachable("Unknown mismatch!");
 290       }
 291     }
 292
 293     // The value may have changed - recompute ValueVT.
 294     ValueVT = Val.getValueType();
 295     assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
 296            "Failed to tile the value with PartVT!");
 297
 298     if (NumParts == 1) {
 299       assert(PartVT == ValueVT && "Type conversion failed!");
 300       Parts[0] = Val;
 301       return;
 302     }
 303
 304     // Expand the value into multiple parts.
 305     if (NumParts & (NumParts - 1)) {
 306       // The number of parts is not a power of 2.  Split off and copy the tail.
 307       assert(PartVT.isInteger() && ValueVT.isInteger() &&
 308              "Do not know what to expand to!");
 309       unsigned RoundParts = 1 << Log2_32(NumParts);
 310       unsigned RoundBits = RoundParts * PartBits;
 311       unsigned OddParts = NumParts - RoundParts;
 312       SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,
 313                                    DAG.getConstant(RoundBits,
 314                                                    TLI.getPointerTy()));
 315       getCopyToParts(DAG, dl, OddVal, Parts + RoundParts,
 316                      OddParts, PartVT);
 317
 318       if (TLI.isBigEndian())
 319         // The odd parts were reversed by getCopyToParts - unreverse them.
 320         std::reverse(Parts + RoundParts, Parts + NumParts);
 321
 322       NumParts = RoundParts;
 323       ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
 324       Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
 325     }
 326
 327     // The number of parts is a power of 2.  Repeatedly bisect the value using
 328     // EXTRACT_ELEMENT.
 329     Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl,
 330                            EVT::getIntegerVT(*DAG.getContext(),
 331                                              ValueVT.getSizeInBits()),
 332                            Val);
 333
 334     for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
 335       for (unsigned i = 0; i < NumParts; i += StepSize) {
 336         unsigned ThisBits = StepSize * PartBits / 2;
 337         EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
 338         SDValue &Part0 = Parts[i];
 339         SDValue &Part1 = Parts[i+StepSize/2];
 340
 341         Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
 342                             ThisVT, Part0,
 343                             DAG.getConstant(1, PtrVT));
 344         Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
 345                             ThisVT, Part0,
 346                             DAG.getConstant(0, PtrVT));
 347
 348         if (ThisBits == PartBits && ThisVT != PartVT) {
 349           Part0 = DAG.getNode(ISD::BIT_CONVERT, dl,
 350                                                 PartVT, Part0);
 351           Part1 = DAG.getNode(ISD::BIT_CONVERT, dl,
 352                                                 PartVT, Part1);
 353         }
 354       }
 355     }
 356
 357     if (TLI.isBigEndian())
 358       std::reverse(Parts, Parts + OrigNumParts);
 359
 360     return;
 361   }
 362
 363   // Vector ValueVT.
 364   if (NumParts == 1) {
 365     if (PartVT != ValueVT) {
 366       if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
 367         Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
 368       } else {
 369         assert(ValueVT.getVectorElementType() == PartVT &&
 370                ValueVT.getVectorNumElements() == 1 &&
 371                "Only trivial vector-to-scalar conversions should get here!");
 372         Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
 373                           PartVT, Val,
 374                           DAG.getConstant(0, PtrVT));
 375       }
 376     }
 377
 378     Parts[0] = Val;
 379     return;
 380   }
 381
 382   // Handle a multi-element vector.
 383   EVT IntermediateVT, RegisterVT;
 384   unsigned NumIntermediates;
 385   unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
 386                               IntermediateVT, NumIntermediates, RegisterVT);
 387   unsigned NumElements = ValueVT.getVectorNumElements();
 388
 389   assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
 390   NumParts = NumRegs; // Silence a compiler warning.
 391   assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
 392
 393   // Split the vector into intermediate operands.
 394   SmallVector<SDValue, 8> Ops(NumIntermediates);
 395   for (unsigned i = 0; i != NumIntermediates; ++i) {
 396     if (IntermediateVT.isVector())
 397       Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
 398                            IntermediateVT, Val,
 399                            DAG.getConstant(i * (NumElements / NumIntermediates),
 400                                            PtrVT));
 401     else
 402       Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
 403                            IntermediateVT, Val,
 404                            DAG.getConstant(i, PtrVT));
 405   }
 406
 407   // Split the intermediate operands into legal parts.
 408   if (NumParts == NumIntermediates) {
 409     // If the register was not expanded, promote or copy the value,
 410     // as appropriate.
 411     for (unsigned i = 0; i != NumParts; ++i)
 412       getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT);
 413   } else if (NumParts > 0) {
 414     // If the intermediate type was expanded, split each the value into
 415     // legal parts.
 416     assert(NumParts % NumIntermediates == 0 &&
 417            "Must expand into a divisible number of parts!");
 418     unsigned Factor = NumParts / NumIntermediates;
 419     for (unsigned i = 0; i != NumIntermediates; ++i)
 420       getCopyToParts(DAG, dl, Ops[i], &Parts[i*Factor], Factor, PartVT);
 421   }
 422 }
 423
 424 namespace {
 425   /// RegsForValue - This struct represents the registers (physical or virtual)
 426   /// that a particular set of values is assigned, and the type information
 427   /// about the value. The most common situation is to represent one value at a
 428   /// time, but struct or array values are handled element-wise as multiple
 429   /// values.  The splitting of aggregates is performed recursively, so that we
 430   /// never have aggregate-typed registers. The values at this point do not
 431   /// necessarily have legal types, so each value may require one or more
 432   /// registers of some legal type.
 433   ///
 434   struct RegsForValue {
 435     /// ValueVTs - The value types of the values, which may not be legal, and
 436     /// may need be promoted or synthesized from one or more registers.
 437     ///
 438     SmallVector<EVT, 4> ValueVTs;
 439
 440     /// RegVTs - The value types of the registers. This is the same size as
 441     /// ValueVTs and it records, for each value, what the type of the assigned
 442     /// register or registers are. (Individual values are never synthesized
 443     /// from more than one type of register.)
 444     ///
 445     /// With virtual registers, the contents of RegVTs is redundant with TLI's
 446     /// getRegisterType member function, however when with physical registers
 447     /// it is necessary to have a separate record of the types.
 448     ///
 449     SmallVector<EVT, 4> RegVTs;
 450
 451     /// Regs - This list holds the registers assigned to the values.
 452     /// Each legal or promoted value requires one register, and each
 453     /// expanded value requires multiple registers.
 454     ///
 455     SmallVector<unsigned, 4> Regs;
 456
 457     RegsForValue() {}
 458
 459     RegsForValue(const SmallVector<unsigned, 4> &regs,
 460                  EVT regvt, EVT valuevt)
 461       : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
 462
 463     RegsForValue(const SmallVector<unsigned, 4> &regs,
 464                  const SmallVector<EVT, 4> &regvts,
 465                  const SmallVector<EVT, 4> &valuevts)
 466       : ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
 467
 468     RegsForValue(LLVMContext &Context, const TargetLowering &tli,
 469                  unsigned Reg, const Type *Ty) {
 470       ComputeValueVTs(tli, Ty, ValueVTs);
 471
 472       for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
 473         EVT ValueVT = ValueVTs[Value];
 474         unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
 475         EVT RegisterVT = tli.getRegisterType(Context, ValueVT);
 476         for (unsigned i = 0; i != NumRegs; ++i)
 477           Regs.push_back(Reg + i);
 478         RegVTs.push_back(RegisterVT);
 479         Reg += NumRegs;
 480       }
 481     }
 482
 483     /// areValueTypesLegal - Return true if types of all the values are legal.
 484     bool areValueTypesLegal(const TargetLowering &TLI) {
 485       for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
 486         EVT RegisterVT = RegVTs[Value];
 487         if (!TLI.isTypeLegal(RegisterVT))
 488           return false;
 489       }
 490       return true;
 491     }
 492
 493     /// append - Add the specified values to this one.
 494     void append(const RegsForValue &RHS) {
 495       ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
 496       RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
 497       Regs.append(RHS.Regs.begin(), RHS.Regs.end());
 498     }
 499
 500     /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
 501     /// this value and returns the result as a ValueVTs value.  This uses
 502     /// Chain/Flag as the input and updates them for the output Chain/Flag.
 503     /// If the Flag pointer is NULL, no flag is used.
 504     SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
 505                             DebugLoc dl,
 506                             SDValue &Chain, SDValue *Flag) const;
 507
 508     /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
 509     /// specified value into the registers specified by this object.  This uses
 510     /// Chain/Flag as the input and updates them for the output Chain/Flag.
 511     /// If the Flag pointer is NULL, no flag is used.
 512     void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
 513                        SDValue &Chain, SDValue *Flag) const;
 514
 515     /// AddInlineAsmOperands - Add this value to the specified inlineasm node
 516     /// operand list.  This adds the code marker, matching input operand index
 517     /// (if applicable), and includes the number of values added into it.
 518     void AddInlineAsmOperands(unsigned Kind,
 519                               bool HasMatching, unsigned MatchingIdx,
 520                               SelectionDAG &DAG,
 521                               std::vector<SDValue> &Ops) const;
 522   };
 523 }
 524
 525 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
 526 /// this value and returns the result as a ValueVT value.  This uses
 527 /// Chain/Flag as the input and updates them for the output Chain/Flag.
 528 /// If the Flag pointer is NULL, no flag is used.
 529 SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
 530                                       FunctionLoweringInfo &FuncInfo,
 531                                       DebugLoc dl,
 532                                       SDValue &Chain, SDValue *Flag) const {
 533   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 534
 535   // Assemble the legal parts into the final values.
 536   SmallVector<SDValue, 4> Values(ValueVTs.size());
 537   SmallVector<SDValue, 8> Parts;
 538   for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
 539     // Copy the legal parts from the registers.
 540     EVT ValueVT = ValueVTs[Value];
 541     unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
 542     EVT RegisterVT = RegVTs[Value];
 543
 544     Parts.resize(NumRegs);
 545     for (unsigned i = 0; i != NumRegs; ++i) {
 546       SDValue P;
 547       if (Flag == 0) {
 548         P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
 549       } else {
 550         P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
 551         *Flag = P.getValue(2);
 552       }
 553
 554       Chain = P.getValue(1);
 555
 556       // If the source register was virtual and if we know something about it,
 557       // add an assert node.
 558       if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
 559           RegisterVT.isInteger() && !RegisterVT.isVector()) {
 560         unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
 561         if (FuncInfo.LiveOutRegInfo.size() > SlotNo) {
 562           const FunctionLoweringInfo::LiveOutInfo &LOI =
 563             FuncInfo.LiveOutRegInfo[SlotNo];
 564
 565           unsigned RegSize = RegisterVT.getSizeInBits();
 566           unsigned NumSignBits = LOI.NumSignBits;
 567           unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
 568
 569           // FIXME: We capture more information than the dag can represent.  For
 570           // now, just use the tightest assertzext/assertsext possible.
 571           bool isSExt = true;
 572           EVT FromVT(MVT::Other);
 573           if (NumSignBits == RegSize)
 574             isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
 575           else if (NumZeroBits >= RegSize-1)
 576             isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1
 577           else if (NumSignBits > RegSize-8)
 578             isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8
 579           else if (NumZeroBits >= RegSize-8)
 580             isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8
 581           else if (NumSignBits > RegSize-16)
 582             isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16
 583           else if (NumZeroBits >= RegSize-16)
 584             isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
 585           else if (NumSignBits > RegSize-32)
 586             isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32
 587           else if (NumZeroBits >= RegSize-32)
 588             isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
 589
 590           if (FromVT != MVT::Other)
 591             P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
 592                             RegisterVT, P, DAG.getValueType(FromVT));
 593         }
 594       }
 595
 596       Parts[i] = P;
 597     }
 598
 599     Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
 600                                      NumRegs, RegisterVT, ValueVT);
 601     Part += NumRegs;
 602     Parts.clear();
 603   }
 604
 605   return DAG.getNode(ISD::MERGE_VALUES, dl,
 606                      DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
 607                      &Values[0], ValueVTs.size());
 608 }
 609
 610 /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
 611 /// specified value into the registers specified by this object.  This uses
 612 /// Chain/Flag as the input and updates them for the output Chain/Flag.
 613 /// If the Flag pointer is NULL, no flag is used.
 614 void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
 615                                  SDValue &Chain, SDValue *Flag) const {
 616   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 617
 618   // Get the list of the values's legal parts.
 619   unsigned NumRegs = Regs.size();
 620   SmallVector<SDValue, 8> Parts(NumRegs);
 621   for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
 622     EVT ValueVT = ValueVTs[Value];
 623     unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
 624     EVT RegisterVT = RegVTs[Value];
 625
 626     getCopyToParts(DAG, dl,
 627                    Val.getValue(Val.getResNo() + Value),
 628                    &Parts[Part], NumParts, RegisterVT);
 629     Part += NumParts;
 630   }
 631
 632   // Copy the parts into the registers.
 633   SmallVector<SDValue, 8> Chains(NumRegs);
 634   for (unsigned i = 0; i != NumRegs; ++i) {
 635     SDValue Part;
 636     if (Flag == 0) {
 637       Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
 638     } else {
 639       Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
 640       *Flag = Part.getValue(1);
 641     }
 642
 643     Chains[i] = Part.getValue(0);
 644   }
 645
 646   if (NumRegs == 1 || Flag)
 647     // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
 648     // flagged to it. That is the CopyToReg nodes and the user are considered
 649     // a single scheduling unit. If we create a TokenFactor and return it as
 650     // chain, then the TokenFactor is both a predecessor (operand) of the
 651     // user as well as a successor (the TF operands are flagged to the user).
 652     // c1, f1 = CopyToReg
 653     // c2, f2 = CopyToReg
 654     // c3     = TokenFactor c1, c2
 655     // ...
 656     //        = op c3, ..., f2
 657     Chain = Chains[NumRegs-1];
 658   else
 659     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
 660 }
 661
 662 /// AddInlineAsmOperands - Add this value to the specified inlineasm node
 663 /// operand list.  This adds the code marker and includes the number of
 664 /// values added into it.
 665 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
 666                                         unsigned MatchingIdx,
 667                                         SelectionDAG &DAG,
 668                                         std::vector<SDValue> &Ops) const {
 669   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 670
 671   unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
 672   if (HasMatching)
 673     Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
 674   SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
 675   Ops.push_back(Res);
 676
 677   for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
 678     unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
 679     EVT RegisterVT = RegVTs[Value];
 680     for (unsigned i = 0; i != NumRegs; ++i) {
 681       assert(Reg < Regs.size() && "Mismatch in # registers expected");
 682       Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
 683     }
 684   }
 685 }
 686
 687 void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
 688   AA = &aa;
 689   GFI = gfi;
 690   TD = DAG.getTarget().getTargetData();
 691 }
 692
 693 /// clear - Clear out the current SelectionDAG and the associated
 694 /// state and prepare this SelectionDAGBuilder object to be used
 695 /// for a new block. This doesn't clear out information about
 696 /// additional blocks that are needed to complete switch lowering
 697 /// or PHI node updating; that information is cleared out as it is
 698 /// consumed.
 699 void SelectionDAGBuilder::clear() {
 700   NodeMap.clear();
 701   UnusedArgNodeMap.clear();
 702   PendingLoads.clear();
 703   PendingExports.clear();
 704   DanglingDebugInfoMap.clear();
 705   CurDebugLoc = DebugLoc();
 706   HasTailCall = false;
 707 }
 708
 709 /// getRoot - Return the current virtual root of the Selection DAG,
 710 /// flushing any PendingLoad items. This must be done before emitting
 711 /// a store or any other node that may need to be ordered after any
 712 /// prior load instructions.
 713 ///
 714 SDValue SelectionDAGBuilder::getRoot() {
 715   if (PendingLoads.empty())
 716     return DAG.getRoot();
 717
 718   if (PendingLoads.size() == 1) {
 719     SDValue Root = PendingLoads[0];
 720     DAG.setRoot(Root);
 721     PendingLoads.clear();
 722     return Root;
 723   }
 724
 725   // Otherwise, we have to make a token factor node.
 726   SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
 727                                &PendingLoads[0], PendingLoads.size());
 728   PendingLoads.clear();
 729   DAG.setRoot(Root);
 730   return Root;
 731 }
 732
 733 /// getControlRoot - Similar to getRoot, but instead of flushing all the
 734 /// PendingLoad items, flush all the PendingExports items. It is necessary
 735 /// to do this before emitting a terminator instruction.
 736 ///
 737 SDValue SelectionDAGBuilder::getControlRoot() {
 738   SDValue Root = DAG.getRoot();
 739
 740   if (PendingExports.empty())
 741     return Root;
 742
 743   // Turn all of the CopyToReg chains into one factored node.
 744   if (Root.getOpcode() != ISD::EntryToken) {
 745     unsigned i = 0, e = PendingExports.size();
 746     for (; i != e; ++i) {
 747       assert(PendingExports[i].getNode()->getNumOperands() > 1);
 748       if (PendingExports[i].getNode()->getOperand(0) == Root)
 749         break;  // Don't add the root if we already indirectly depend on it.
 750     }
 751
 752     if (i == e)
 753       PendingExports.push_back(Root);
 754   }
 755
 756   Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
 757                      &PendingExports[0],
 758                      PendingExports.size());
 759   PendingExports.clear();
 760   DAG.setRoot(Root);
 761   return Root;
 762 }
 763
 764 void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) {
 765   if (DAG.GetOrdering(Node) != 0) return; // Already has ordering.
 766   DAG.AssignOrdering(Node, SDNodeOrder);
 767
 768   for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
 769     AssignOrderingToNode(Node->getOperand(I).getNode());
 770 }
 771
 772 void SelectionDAGBuilder::visit(const Instruction &I) {
 773   // Set up outgoing PHI node register values before emitting the terminator.
 774   if (isa<TerminatorInst>(&I))
 775     HandlePHINodesInSuccessorBlocks(I.getParent());
 776
 777   CurDebugLoc = I.getDebugLoc();
 778
 779   visit(I.getOpcode(), I);
 780
 781   if (!isa<TerminatorInst>(&I) && !HasTailCall)
 782     CopyToExportRegsIfNeeded(&I);
 783
 784   CurDebugLoc = DebugLoc();
 785 }
 786
 787 void SelectionDAGBuilder::visitPHI(const PHINode &) {
 788   llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
 789 }
 790
 791 void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
 792   // Note: this doesn't use InstVisitor, because it has to work with
 793   // ConstantExpr's in addition to instructions.
 794   switch (Opcode) {
 795   default: llvm_unreachable("Unknown instruction type encountered!");
 796     // Build the switch statement using the Instruction.def file.
 797 #define HANDLE_INST(NUM, OPCODE, CLASS) \
 798     case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break;
 799 #include "llvm/Instruction.def"
 800   }
 801
 802   // Assign the ordering to the freshly created DAG nodes.
 803   if (NodeMap.count(&I)) {
 804     ++SDNodeOrder;
 805     AssignOrderingToNode(getValue(&I).getNode());
 806   }
 807 }
 808
 809 // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
 810 // generate the debug data structures now that we've seen its definition.
 811 void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
 812                                                    SDValue Val) {
 813   DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
 814   if (DDI.getDI()) {
 815     const DbgValueInst *DI = DDI.getDI();
 816     DebugLoc dl = DDI.getdl();
 817     unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
 818     MDNode *Variable = DI->getVariable();
 819     uint64_t Offset = DI->getOffset();
 820     SDDbgValue *SDV;
 821     if (Val.getNode()) {
 822       if (!EmitFuncArgumentDbgValue(*DI, V, Variable, Offset, Val)) {
 823         SDV = DAG.getDbgValue(Variable, Val.getNode(),
 824                               Val.getResNo(), Offset, dl, DbgSDNodeOrder);
 825         DAG.AddDbgValue(SDV, Val.getNode(), false);
 826       }
 827     } else {
 828       SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
 829                             Offset, dl, SDNodeOrder);
 830       DAG.AddDbgValue(SDV, 0, false);
 831     }
 832     DanglingDebugInfoMap[V] = DanglingDebugInfo();
 833   }
 834 }
 835
 836 // getValue - Return an SDValue for the given Value.
 837 SDValue SelectionDAGBuilder::getValue(const Value *V) {
 838   // If we already have an SDValue for this value, use it. It's important
 839   // to do this first, so that we don't create a CopyFromReg if we already
 840   // have a regular SDValue.
 841   SDValue &N = NodeMap[V];
 842   if (N.getNode()) return N;
 843
 844   // If there's a virtual register allocated and initialized for this
 845   // value, use it.
 846   DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
 847   if (It != FuncInfo.ValueMap.end()) {
 848     unsigned InReg = It->second;
 849     RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
 850     SDValue Chain = DAG.getEntryNode();
 851     return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
 852   }
 853
 854   // Otherwise create a new SDValue and remember it.
 855   SDValue Val = getValueImpl(V);
 856   NodeMap[V] = Val;
 857   resolveDanglingDebugInfo(V, Val);
 858   return Val;
 859 }
 860
 861 /// getNonRegisterValue - Return an SDValue for the given Value, but
 862 /// don't look in FuncInfo.ValueMap for a virtual register.
 863 SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
 864   // If we already have an SDValue for this value, use it.
 865   SDValue &N = NodeMap[V];
 866   if (N.getNode()) return N;
 867
 868   // Otherwise create a new SDValue and remember it.
 869   SDValue Val = getValueImpl(V);
 870   NodeMap[V] = Val;
 871   resolveDanglingDebugInfo(V, Val);
 872   return Val;
 873 }
 874
 875 /// getValueImpl - Helper function for getValue and getNonRegisterValue.
 876 /// Create an SDValue for the given value.
 877 SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
 878   if (const Constant *C = dyn_cast<Constant>(V)) {
 879     EVT VT = TLI.getValueType(V->getType(), true);
 880
 881     if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
 882       return DAG.getConstant(*CI, VT);
 883
 884     if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 885       return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT);
 886
 887     if (isa<ConstantPointerNull>(C))
 888       return DAG.getConstant(0, TLI.getPointerTy());
 889
 890     if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 891       return DAG.getConstantFP(*CFP, VT);
 892
 893     if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
 894       return DAG.getUNDEF(VT);
 895
 896     if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
 897       visit(CE->getOpcode(), *CE);
 898       SDValue N1 = NodeMap[V];
 899       assert(N1.getNode() && "visit didn't populate the NodeMap!");
 900       return N1;
 901     }
 902
 903     if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
 904       SmallVector<SDValue, 4> Constants;
 905       for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
 906            OI != OE; ++OI) {
 907         SDNode *Val = getValue(*OI).getNode();
 908         // If the operand is an empty aggregate, there are no values.
 909         if (!Val) continue;
 910         // Add each leaf value from the operand to the Constants list
 911         // to form a flattened list of all the values.
 912         for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
 913           Constants.push_back(SDValue(Val, i));
 914       }
 915
 916       return DAG.getMergeValues(&Constants[0], Constants.size(),
 917                                 getCurDebugLoc());
 918     }
 919
 920     if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
 921       assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
 922              "Unknown struct or array constant!");
 923
 924       SmallVector<EVT, 4> ValueVTs;
 925       ComputeValueVTs(TLI, C->getType(), ValueVTs);
 926       unsigned NumElts = ValueVTs.size();
 927       if (NumElts == 0)
 928         return SDValue(); // empty struct
 929       SmallVector<SDValue, 4> Constants(NumElts);
 930       for (unsigned i = 0; i != NumElts; ++i) {
 931         EVT EltVT = ValueVTs[i];
 932         if (isa<UndefValue>(C))
 933           Constants[i] = DAG.getUNDEF(EltVT);
 934         else if (EltVT.isFloatingPoint())
 935           Constants[i] = DAG.getConstantFP(0, EltVT);
 936         else
 937           Constants[i] = DAG.getConstant(0, EltVT);
 938       }
 939
 940       return DAG.getMergeValues(&Constants[0], NumElts,
 941                                 getCurDebugLoc());
 942     }
 943
 944     if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
 945       return DAG.getBlockAddress(BA, VT);
 946
 947     const VectorType *VecTy = cast<VectorType>(V->getType());
 948     unsigned NumElements = VecTy->getNumElements();
 949
 950     // Now that we know the number and type of the elements, get that number of
 951     // elements into the Ops array based on what kind of constant it is.
 952     SmallVector<SDValue, 16> Ops;
 953     if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
 954       for (unsigned i = 0; i != NumElements; ++i)
 955         Ops.push_back(getValue(CP->getOperand(i)));
 956     } else {
 957       assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
 958       EVT EltVT = TLI.getValueType(VecTy->getElementType());
 959
 960       SDValue Op;
 961       if (EltVT.isFloatingPoint())
 962         Op = DAG.getConstantFP(0, EltVT);
 963       else
 964         Op = DAG.getConstant(0, EltVT);
 965       Ops.assign(NumElements, Op);
 966     }
 967
 968     // Create a BUILD_VECTOR node.
 969     return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
 970                                     VT, &Ops[0], Ops.size());
 971   }
 972
 973   // If this is a static alloca, generate it as the frameindex instead of
 974   // computation.
 975   if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
 976     DenseMap<const AllocaInst*, int>::iterator SI =
 977       FuncInfo.StaticAllocaMap.find(AI);
 978     if (SI != FuncInfo.StaticAllocaMap.end())
 979       return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
 980   }
 981
 982   // If this is an instruction which fast-isel has deferred, select it now.
 983   if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
 984     unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
 985     RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
 986     SDValue Chain = DAG.getEntryNode();
 987     return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
 988   }
 989
 990   llvm_unreachable("Can't get register for value!");
 991   return SDValue();
 992 }
 993
 994 void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
 995   SDValue Chain = getControlRoot();
 996   SmallVector<ISD::OutputArg, 8> Outs;
 997   SmallVector<SDValue, 8> OutVals;
 998
 999   if (!FuncInfo.CanLowerReturn) {
1000     unsigned DemoteReg = FuncInfo.DemoteRegister;
1001     const Function *F = I.getParent()->getParent();
1002
1003     // Emit a store of the return value through the virtual register.
1004     // Leave Outs empty so that LowerReturn won't try to load return
1005     // registers the usual way.
1006     SmallVector<EVT, 1> PtrValueVTs;
1007     ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
1008                     PtrValueVTs);
1009
1010     SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
1011     SDValue RetOp = getValue(I.getOperand(0));
1012
1013     SmallVector<EVT, 4> ValueVTs;
1014     SmallVector<uint64_t, 4> Offsets;
1015     ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
1016     unsigned NumValues = ValueVTs.size();
1017
1018     SmallVector<SDValue, 4> Chains(NumValues);
1019     EVT PtrVT = PtrValueVTs[0];
1020     for (unsigned i = 0; i != NumValues; ++i) {
1021       SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr,
1022                                 DAG.getConstant(Offsets[i], PtrVT));
1023       Chains[i] =
1024         DAG.getStore(Chain, getCurDebugLoc(),
1025                      SDValue(RetOp.getNode(), RetOp.getResNo() + i),
1026                      Add, NULL, Offsets[i], false, false, 0);
1027     }
1028
1029     Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
1030                         MVT::Other, &Chains[0], NumValues);
1031   } else if (I.getNumOperands() != 0) {
1032     SmallVector<EVT, 4> ValueVTs;
1033     ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs);
1034     unsigned NumValues = ValueVTs.size();
1035     if (NumValues) {
1036       SDValue RetOp = getValue(I.getOperand(0));
1037       for (unsigned j = 0, f = NumValues; j != f; ++j) {
1038         EVT VT = ValueVTs[j];
1039
1040         ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
1041
1042         const Function *F = I.getParent()->getParent();
1043         if (F->paramHasAttr(0, Attribute::SExt))
1044           ExtendKind = ISD::SIGN_EXTEND;
1045         else if (F->paramHasAttr(0, Attribute::ZExt))
1046           ExtendKind = ISD::ZERO_EXTEND;
1047
1048         // FIXME: C calling convention requires the return type to be promoted
1049         // to at least 32-bit. But this is not necessary for non-C calling
1050         // conventions. The frontend should mark functions whose return values
1051         // require promoting with signext or zeroext attributes.
1052         if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
1053           EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
1054           if (VT.bitsLT(MinVT))
1055             VT = MinVT;
1056         }
1057
1058         unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
1059         EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
1060         SmallVector<SDValue, 4> Parts(NumParts);
1061         getCopyToParts(DAG, getCurDebugLoc(),
1062                        SDValue(RetOp.getNode(), RetOp.getResNo() + j),
1063                        &Parts[0], NumParts, PartVT, ExtendKind);
1064
1065         // 'inreg' on function refers to return value
1066         ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1067         if (F->paramHasAttr(0, Attribute::InReg))
1068           Flags.setInReg();
1069
1070         // Propagate extension type if any
1071         if (F->paramHasAttr(0, Attribute::SExt))
1072           Flags.setSExt();
1073         else if (F->paramHasAttr(0, Attribute::ZExt))
1074           Flags.setZExt();
1075
1076         for (unsigned i = 0; i < NumParts; ++i) {
1077           Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
1078                                         /*isfixed=*/true));
1079           OutVals.push_back(Parts[i]);
1080         }
1081       }
1082     }
1083   }
1084
1085   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1086   CallingConv::ID CallConv =
1087     DAG.getMachineFunction().getFunction()->getCallingConv();
1088   Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
1089                           Outs, OutVals, getCurDebugLoc(), DAG);
1090
1091   // Verify that the target's LowerReturn behaved as expected.
1092   assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
1093          "LowerReturn didn't return a valid chain!");
1094
1095   // Update the DAG with the new chain value resulting from return lowering.
1096   DAG.setRoot(Chain);
1097 }
1098
1099 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
1100 /// created for it, emit nodes to copy the value into the virtual
1101 /// registers.
1102 void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
1103   DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
1104   if (VMI != FuncInfo.ValueMap.end()) {
1105     assert(!V->use_empty() && "Unused value assigned virtual registers!");
1106     CopyValueToVirtualRegister(V, VMI->second);
1107   }
1108 }
1109
1110 /// ExportFromCurrentBlock - If this condition isn't known to be exported from
1111 /// the current basic block, add it to ValueMap now so that we'll get a
1112 /// CopyTo/FromReg.
1113 void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
1114   // No need to export constants.
1115   if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
1116
1117   // Already exported?
1118   if (FuncInfo.isExportedInst(V)) return;
1119
1120   unsigned Reg = FuncInfo.InitializeRegForValue(V);
1121   CopyValueToVirtualRegister(V, Reg);
1122 }
1123
1124 bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
1125                                                      const BasicBlock *FromBB) {
1126   // The operands of the setcc have to be in this block.  We don't know
1127   // how to export them from some other block.
1128   if (const Instruction *VI = dyn_cast<Instruction>(V)) {
1129     // Can export from current BB.
1130     if (VI->getParent() == FromBB)
1131       return true;
1132
1133     // Is already exported, noop.
1134     return FuncInfo.isExportedInst(V);
1135   }
1136
1137   // If this is an argument, we can export it if the BB is the entry block or
1138   // if it is already exported.
1139   if (isa<Argument>(V)) {
1140     if (FromBB == &FromBB->getParent()->getEntryBlock())
1141       return true;
1142
1143     // Otherwise, can only export this if it is already exported.
1144     return FuncInfo.isExportedInst(V);
1145   }
1146
1147   // Otherwise, constants can always be exported.
1148   return true;
1149 }
1150
1151 static bool InBlock(const Value *V, const BasicBlock *BB) {
1152   if (const Instruction *I = dyn_cast<Instruction>(V))
1153     return I->getParent() == BB;
1154   return true;
1155 }
1156
1157 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
1158 /// This function emits a branch and is used at the leaves of an OR or an
1159 /// AND operator tree.
1160 ///
1161 void
1162 SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
1163                                                   MachineBasicBlock *TBB,
1164                                                   MachineBasicBlock *FBB,
1165                                                   MachineBasicBlock *CurBB,
1166                                                   MachineBasicBlock *SwitchBB) {
1167   const BasicBlock *BB = CurBB->getBasicBlock();
1168
1169   // If the leaf of the tree is a comparison, merge the condition into
1170   // the caseblock.
1171   if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
1172     // The operands of the cmp have to be in this block.  We don't know
1173     // how to export them from some other block.  If this is the first block
1174     // of the sequence, no exporting is needed.
1175     if (CurBB == SwitchBB ||
1176         (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
1177          isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
1178       ISD::CondCode Condition;
1179       if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
1180         Condition = getICmpCondCode(IC->getPredicate());
1181       } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
1182         Condition = getFCmpCondCode(FC->getPredicate());
1183       } else {
1184         Condition = ISD::SETEQ; // silence warning.
1185         llvm_unreachable("Unknown compare instruction");
1186       }
1187
1188       CaseBlock CB(Condition, BOp->getOperand(0),
1189                    BOp->getOperand(1), NULL, TBB, FBB, CurBB);
1190       SwitchCases.push_back(CB);
1191       return;
1192     }
1193   }
1194
1195   // Create a CaseBlock record representing this branch.
1196   CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
1197                NULL, TBB, FBB, CurBB);
1198   SwitchCases.push_back(CB);
1199 }
1200
1201 /// FindMergedConditions - If Cond is an expression like
1202 void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
1203                                                MachineBasicBlock *TBB,
1204                                                MachineBasicBlock *FBB,
1205                                                MachineBasicBlock *CurBB,
1206                                                MachineBasicBlock *SwitchBB,
1207                                                unsigned Opc) {
1208   // If this node is not part of the or/and tree, emit it as a branch.
1209   const Instruction *BOp = dyn_cast<Instruction>(Cond);
1210   if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
1211       (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
1212       BOp->getParent() != CurBB->getBasicBlock() ||
1213       !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
1214       !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
1215     EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB);
1216     return;
1217   }
1218
1219   //  Create TmpBB after CurBB.
1220   MachineFunction::iterator BBI = CurBB;
1221   MachineFunction &MF = DAG.getMachineFunction();
1222   MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
1223   CurBB->getParent()->insert(++BBI, TmpBB);
1224
1225   if (Opc == Instruction::Or) {
1226     // Codegen X | Y as:
1227     //   jmp_if_X TBB
1228     //   jmp TmpBB
1229     // TmpBB:
1230     //   jmp_if_Y TBB
1231     //   jmp FBB
1232     //
1233
1234     // Emit the LHS condition.
1235     FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc);
1236
1237     // Emit the RHS condition into TmpBB.
1238     FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
1239   } else {
1240     assert(Opc == Instruction::And && "Unknown merge op!");
1241     // Codegen X & Y as:
1242     //   jmp_if_X TmpBB
1243     //   jmp FBB
1244     // TmpBB:
1245     //   jmp_if_Y TBB
1246     //   jmp FBB
1247     //
1248     //  This requires creation of TmpBB after CurBB.
1249
1250     // Emit the LHS condition.
1251     FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc);
1252
1253     // Emit the RHS condition into TmpBB.
1254     FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
1255   }
1256 }
1257
1258 /// If the set of cases should be emitted as a series of branches, return true.
1259 /// If we should emit this as a bunch of and/or'd together conditions, return
1260 /// false.
1261 bool
1262 SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
1263   if (Cases.size() != 2) return true;
1264
1265   // If this is two comparisons of the same values or'd or and'd together, they
1266   // will get folded into a single comparison, so don't emit two blocks.
1267   if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
1268        Cases[0].CmpRHS == Cases[1].CmpRHS) ||
1269       (Cases[0].CmpRHS == Cases[1].CmpLHS &&
1270        Cases[0].CmpLHS == Cases[1].CmpRHS)) {
1271     return false;
1272   }
1273
1274   // Handle: (X != null) | (Y != null) --> (X|Y) != 0
1275   // Handle: (X == null) & (Y == null) --> (X|Y) == 0
1276   if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
1277       Cases[0].CC == Cases[1].CC &&
1278       isa<Constant>(Cases[0].CmpRHS) &&
1279       cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
1280     if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
1281       return false;
1282     if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
1283       return false;
1284   }
1285
1286   return true;
1287 }
1288
1289 void SelectionDAGBuilder::visitBr(const BranchInst &I) {
1290   MachineBasicBlock *BrMBB = FuncInfo.MBB;
1291
1292   // Update machine-CFG edges.
1293   MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
1294
1295   // Figure out which block is immediately after the current one.
1296   MachineBasicBlock *NextBlock = 0;
1297   MachineFunction::iterator BBI = BrMBB;
1298   if (++BBI != FuncInfo.MF->end())
1299     NextBlock = BBI;
1300
1301   if (I.isUnconditional()) {
1302     // Update machine-CFG edges.
1303     BrMBB->addSuccessor(Succ0MBB);
1304
1305     // If this is not a fall-through branch, emit the branch.
1306     if (Succ0MBB != NextBlock)
1307       DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
1308                               MVT::Other, getControlRoot(),
1309                               DAG.getBasicBlock(Succ0MBB)));
1310
1311     return;
1312   }
1313
1314   // If this condition is one of the special cases we handle, do special stuff
1315   // now.
1316   const Value *CondVal = I.getCondition();
1317   MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
1318
1319   // If this is a series of conditions that are or'd or and'd together, emit
1320   // this as a sequence of branches instead of setcc's with and/or operations.
1321   // For example, instead of something like:
1322   //     cmp A, B
1323   //     C = seteq
1324   //     cmp D, E
1325   //     F = setle
1326   //     or C, F
1327   //     jnz foo
1328   // Emit:
1329   //     cmp A, B
1330   //     je foo
1331   //     cmp D, E
1332   //     jle foo
1333   //
1334   if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
1335     if (BOp->hasOneUse() &&
1336         (BOp->getOpcode() == Instruction::And ||
1337          BOp->getOpcode() == Instruction::Or)) {
1338       FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
1339                            BOp->getOpcode());
1340       // If the compares in later blocks need to use values not currently
1341       // exported from this block, export them now.  This block should always
1342       // be the first entry.
1343       assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
1344
1345       // Allow some cases to be rejected.
1346       if (ShouldEmitAsBranches(SwitchCases)) {
1347         for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
1348           ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
1349           ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
1350         }
1351
1352         // Emit the branch for this block.
1353         visitSwitchCase(SwitchCases[0], BrMBB);
1354         SwitchCases.erase(SwitchCases.begin());
1355         return;
1356       }
1357
1358       // Okay, we decided not to do this, remove any inserted MBB's and clear
1359       // SwitchCases.
1360       for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
1361         FuncInfo.MF->erase(SwitchCases[i].ThisBB);
1362
1363       SwitchCases.clear();
1364     }
1365   }
1366
1367   // Create a CaseBlock record representing this branch.
1368   CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
1369                NULL, Succ0MBB, Succ1MBB, BrMBB);
1370
1371   // Use visitSwitchCase to actually insert the fast branch sequence for this
1372   // cond branch.
1373   visitSwitchCase(CB, BrMBB);
1374 }
1375
1376 /// visitSwitchCase - Emits the necessary code to represent a single node in
1377 /// the binary search tree resulting from lowering a switch instruction.
1378 void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
1379                                           MachineBasicBlock *SwitchBB) {
1380   SDValue Cond;
1381   SDValue CondLHS = getValue(CB.CmpLHS);
1382   DebugLoc dl = getCurDebugLoc();
1383
1384   // Build the setcc now.
1385   if (CB.CmpMHS == NULL) {
1386     // Fold "(X == true)" to X and "(X == false)" to !X to
1387     // handle common cases produced by branch lowering.
1388     if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
1389         CB.CC == ISD::SETEQ)
1390       Cond = CondLHS;
1391     else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
1392              CB.CC == ISD::SETEQ) {
1393       SDValue True = DAG.getConstant(1, CondLHS.getValueType());
1394       Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
1395     } else
1396       Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
1397   } else {
1398     assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
1399
1400     const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
1401     const APInt& High  = cast<ConstantInt>(CB.CmpRHS)->getValue();
1402
1403     SDValue CmpOp = getValue(CB.CmpMHS);
1404     EVT VT = CmpOp.getValueType();
1405
1406     if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
1407       Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
1408                           ISD::SETLE);
1409     } else {
1410       SDValue SUB = DAG.getNode(ISD::SUB, dl,
1411                                 VT, CmpOp, DAG.getConstant(Low, VT));
1412       Cond = DAG.getSetCC(dl, MVT::i1, SUB,
1413                           DAG.getConstant(High-Low, VT), ISD::SETULE);
1414     }
1415   }
1416
1417   // Update successor info
1418   SwitchBB->addSuccessor(CB.TrueBB);
1419   SwitchBB->addSuccessor(CB.FalseBB);
1420
1421   // Set NextBlock to be the MBB immediately after the current one, if any.
1422   // This is used to avoid emitting unnecessary branches to the next block.
1423   MachineBasicBlock *NextBlock = 0;
1424   MachineFunction::iterator BBI = SwitchBB;
1425   if (++BBI != FuncInfo.MF->end())
1426     NextBlock = BBI;
1427
1428   // If the lhs block is the next block, invert the condition so that we can
1429   // fall through to the lhs instead of the rhs block.
1430   if (CB.TrueBB == NextBlock) {
1431     std::swap(CB.TrueBB, CB.FalseBB);
1432     SDValue True = DAG.getConstant(1, Cond.getValueType());
1433     Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
1434   }
1435
1436   SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
1437                                MVT::Other, getControlRoot(), Cond,
1438                                DAG.getBasicBlock(CB.TrueBB));
1439
1440   // Insert the false branch.
1441   if (CB.FalseBB != NextBlock)
1442     BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
1443                          DAG.getBasicBlock(CB.FalseBB));
1444
1445   DAG.setRoot(BrCond);
1446 }
1447
1448 /// visitJumpTable - Emit JumpTable node in the current MBB
1449 void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
1450   // Emit the code for the jump table
1451   assert(JT.Reg != -1U && "Should lower JT Header first!");
1452   EVT PTy = TLI.getPointerTy();
1453   SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
1454                                      JT.Reg, PTy);
1455   SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
1456   SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(),
1457                                     MVT::Other, Index.getValue(1),
1458                                     Table, Index);
1459   DAG.setRoot(BrJumpTable);
1460 }
1461
1462 /// visitJumpTableHeader - This function emits necessary code to produce index
1463 /// in the JumpTable from switch case.
1464 void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
1465                                                JumpTableHeader &JTH,
1466                                                MachineBasicBlock *SwitchBB) {
1467   // Subtract the lowest switch case value from the value being switched on and
1468   // conditional branch to default mbb if the result is greater than the
1469   // difference between smallest and largest cases.
1470   SDValue SwitchOp = getValue(JTH.SValue);
1471   EVT VT = SwitchOp.getValueType();
1472   SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
1473                             DAG.getConstant(JTH.First, VT));
1474
1475   // The SDNode we just created, which holds the value being switched on minus
1476   // the smallest case value, needs to be copied to a virtual register so it
1477   // can be used as an index into the jump table in a subsequent basic block.
1478   // This value may be smaller or larger than the target's pointer type, and
1479   // therefore require extension or truncating.
1480   SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy());
1481
1482   unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy());
1483   SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
1484                                     JumpTableReg, SwitchOp);
1485   JT.Reg = JumpTableReg;
1486
1487   // Emit the range check for the jump table, and branch to the default block
1488   // for the switch statement if the value being switched on exceeds the largest
1489   // case in the switch.
1490   SDValue CMP = DAG.getSetCC(getCurDebugLoc(),
1491                              TLI.getSetCCResultType(Sub.getValueType()), Sub,
1492                              DAG.getConstant(JTH.Last-JTH.First,VT),
1493                              ISD::SETUGT);
1494
1495   // Set NextBlock to be the MBB immediately after the current one, if any.
1496   // This is used to avoid emitting unnecessary branches to the next block.
1497   MachineBasicBlock *NextBlock = 0;
1498   MachineFunction::iterator BBI = SwitchBB;
1499
1500   if (++BBI != FuncInfo.MF->end())
1501     NextBlock = BBI;
1502
1503   SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
1504                                MVT::Other, CopyTo, CMP,
1505                                DAG.getBasicBlock(JT.Default));
1506
1507   if (JT.MBB != NextBlock)
1508     BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,
1509                          DAG.getBasicBlock(JT.MBB));
1510
1511   DAG.setRoot(BrCond);
1512 }
1513
1514 /// visitBitTestHeader - This function emits necessary code to produce value
1515 /// suitable for "bit tests"
1516 void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
1517                                              MachineBasicBlock *SwitchBB) {
1518   // Subtract the minimum value
1519   SDValue SwitchOp = getValue(B.SValue);
1520   EVT VT = SwitchOp.getValueType();
1521   SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
1522                             DAG.getConstant(B.First, VT));
1523
1524   // Check range
1525   SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(),
1526                                   TLI.getSetCCResultType(Sub.getValueType()),
1527                                   Sub, DAG.getConstant(B.Range, VT),
1528                                   ISD::SETUGT);
1529
1530   SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(),
1531                                        TLI.getPointerTy());
1532
1533   B.Reg = FuncInfo.CreateReg(TLI.getPointerTy());
1534   SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
1535                                     B.Reg, ShiftOp);
1536
1537   // Set NextBlock to be the MBB immediately after the current one, if any.
1538   // This is used to avoid emitting unnecessary branches to the next block.
1539   MachineBasicBlock *NextBlock = 0;
1540   MachineFunction::iterator BBI = SwitchBB;
1541   if (++BBI != FuncInfo.MF->end())
1542     NextBlock = BBI;
1543
1544   MachineBasicBlock* MBB = B.Cases[0].ThisBB;
1545
1546   SwitchBB->addSuccessor(B.Default);
1547   SwitchBB->addSuccessor(MBB);
1548
1549   SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
1550                                 MVT::Other, CopyTo, RangeCmp,
1551                                 DAG.getBasicBlock(B.Default));
1552
1553   if (MBB != NextBlock)
1554     BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,
1555                           DAG.getBasicBlock(MBB));
1556
1557   DAG.setRoot(BrRange);
1558 }
1559
1560 /// visitBitTestCase - this function produces one "bit test"
1561 void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
1562                                            unsigned Reg,
1563                                            BitTestCase &B,
1564                                            MachineBasicBlock *SwitchBB) {
1565   SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
1566                                        TLI.getPointerTy());
1567   SDValue Cmp;
1568   if (CountPopulation_64(B.Mask) == 1) {
1569     // Testing for a single bit; just compare the shift count with what it
1570     // would need to be to shift a 1 bit in that position.
1571     Cmp = DAG.getSetCC(getCurDebugLoc(),
1572                        TLI.getSetCCResultType(ShiftOp.getValueType()),
1573                        ShiftOp,
1574                        DAG.getConstant(CountTrailingZeros_64(B.Mask),
1575                                        TLI.getPointerTy()),
1576                        ISD::SETEQ);
1577   } else {
1578     // Make desired shift
1579     SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
1580                                     TLI.getPointerTy(),
1581                                     DAG.getConstant(1, TLI.getPointerTy()),
1582                                     ShiftOp);
1583
1584     // Emit bit tests and jumps
1585     SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
1586                                 TLI.getPointerTy(), SwitchVal,
1587                                 DAG.getConstant(B.Mask, TLI.getPointerTy()));
1588     Cmp = DAG.getSetCC(getCurDebugLoc(),
1589                        TLI.getSetCCResultType(AndOp.getValueType()),
1590                        AndOp, DAG.getConstant(0, TLI.getPointerTy()),
1591                        ISD::SETNE);
1592   }
1593
1594   SwitchBB->addSuccessor(B.TargetBB);
1595   SwitchBB->addSuccessor(NextMBB);
1596
1597   SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
1598                               MVT::Other, getControlRoot(),
1599                               Cmp, DAG.getBasicBlock(B.TargetBB));
1600
1601   // Set NextBlock to be the MBB immediately after the current one, if any.
1602   // This is used to avoid emitting unnecessary branches to the next block.
1603   MachineBasicBlock *NextBlock = 0;
1604   MachineFunction::iterator BBI = SwitchBB;
1605   if (++BBI != FuncInfo.MF->end())
1606     NextBlock = BBI;
1607
1608   if (NextMBB != NextBlock)
1609     BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,
1610                         DAG.getBasicBlock(NextMBB));
1611
1612   DAG.setRoot(BrAnd);
1613 }
1614
1615 void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
1616   MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
1617
1618   // Retrieve successors.
1619   MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
1620   MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
1621
1622   const Value *Callee(I.getCalledValue());
1623   if (isa<InlineAsm>(Callee))
1624     visitInlineAsm(&I);
1625   else
1626     LowerCallTo(&I, getValue(Callee), false, LandingPad);
1627
1628   // If the value of the invoke is used outside of its defining block, make it
1629   // available as a virtual register.
1630   CopyToExportRegsIfNeeded(&I);
1631
1632   // Update successor info
1633   InvokeMBB->addSuccessor(Return);
1634   InvokeMBB->addSuccessor(LandingPad);
1635
1636   // Drop into normal successor.
1637   DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
1638                           MVT::Other, getControlRoot(),
1639                           DAG.getBasicBlock(Return)));
1640 }
1641
1642 void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
1643 }
1644
1645 /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
1646 /// small case ranges).
1647 bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
1648                                                  CaseRecVector& WorkList,
1649                                                  const Value* SV,
1650                                                  MachineBasicBlock *Default,
1651                                                  MachineBasicBlock *SwitchBB) {
1652   Case& BackCase  = *(CR.Range.second-1);
1653
1654   // Size is the number of Cases represented by this range.
1655   size_t Size = CR.Range.second - CR.Range.first;
1656   if (Size > 3)
1657     return false;
1658
1659   // Get the MachineFunction which holds the current MBB.  This is used when
1660   // inserting any additional MBBs necessary to represent the switch.
1661   MachineFunction *CurMF = FuncInfo.MF;
1662
1663   // Figure out which block is immediately after the current one.
1664   MachineBasicBlock *NextBlock = 0;
1665   MachineFunction::iterator BBI = CR.CaseBB;
1666
1667   if (++BBI != FuncInfo.MF->end())
1668     NextBlock = BBI;
1669
1670   // TODO: If any two of the cases has the same destination, and if one value
1671   // is the same as the other, but has one bit unset that the other has set,
1672   // use bit manipulation to do two compares at once.  For example:
1673   // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
1674
1675   // Rearrange the case blocks so that the last one falls through if possible.
1676   if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
1677     // The last case block won't fall through into 'NextBlock' if we emit the
1678     // branches in this order.  See if rearranging a case value would help.
1679     for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
1680       if (I->BB == NextBlock) {
1681         std::swap(*I, BackCase);
1682         break;
1683       }
1684     }
1685   }
1686
1687   // Create a CaseBlock record representing a conditional branch to
1688   // the Case's target mbb if the value being switched on SV is equal
1689   // to C.
1690   MachineBasicBlock *CurBlock = CR.CaseBB;
1691   for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
1692     MachineBasicBlock *FallThrough;
1693     if (I != E-1) {
1694       FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
1695       CurMF->insert(BBI, FallThrough);
1696
1697       // Put SV in a virtual register to make it available from the new blocks.
1698       ExportFromCurrentBlock(SV);
1699     } else {
1700       // If the last case doesn't match, go to the default block.
1701       FallThrough = Default;
1702     }
1703
1704     const Value *RHS, *LHS, *MHS;
1705     ISD::CondCode CC;
1706     if (I->High == I->Low) {
1707       // This is just small small case range :) containing exactly 1 case
1708       CC = ISD::SETEQ;
1709       LHS = SV; RHS = I->High; MHS = NULL;
1710     } else {
1711       CC = ISD::SETLE;
1712       LHS = I->Low; MHS = SV; RHS = I->High;
1713     }
1714     CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);
1715
1716     // If emitting the first comparison, just call visitSwitchCase to emit the
1717     // code into the current block.  Otherwise, push the CaseBlock onto the
1718     // vector to be later processed by SDISel, and insert the node's MBB
1719     // before the next MBB.
1720     if (CurBlock == SwitchBB)
1721       visitSwitchCase(CB, SwitchBB);
1722     else
1723       SwitchCases.push_back(CB);
1724
1725     CurBlock = FallThrough;
1726   }
1727
1728   return true;
1729 }
1730
1731 static inline bool areJTsAllowed(const TargetLowering &TLI) {
1732   return !DisableJumpTables &&
1733           (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
1734            TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
1735 }
1736
1737 static APInt ComputeRange(const APInt &First, const APInt &Last) {
1738   APInt LastExt(Last), FirstExt(First);
1739   uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
1740   LastExt.sext(BitWidth); FirstExt.sext(BitWidth);
1741   return (LastExt - FirstExt + 1ULL);
1742 }
1743
1744 /// handleJTSwitchCase - Emit jumptable for current switch case range
1745 bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
1746                                              CaseRecVector& WorkList,
1747                                              const Value* SV,
1748                                              MachineBasicBlock* Default,
1749                                              MachineBasicBlock *SwitchBB) {
1750   Case& FrontCase = *CR.Range.first;
1751   Case& BackCase  = *(CR.Range.second-1);
1752
1753   const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
1754   const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
1755
1756   APInt TSize(First.getBitWidth(), 0);
1757   for (CaseItr I = CR.Range.first, E = CR.Range.second;
1758        I!=E; ++I)
1759     TSize += I->size();
1760
1761   if (!areJTsAllowed(TLI) || TSize.ult(4))
1762     return false;
1763
1764   APInt Range = ComputeRange(First, Last);
1765   double Density = TSize.roundToDouble() / Range.roundToDouble();
1766   if (Density < 0.4)
1767     return false;
1768
1769   DEBUG(dbgs() << "Lowering jump table\n"
1770                << "First entry: " << First << ". Last entry: " << Last << '\n'
1771                << "Range: " << Range
1772                << "Size: " << TSize << ". Density: " << Density << "\n\n");
1773
1774   // Get the MachineFunction which holds the current MBB.  This is used when
1775   // inserting any additional MBBs necessary to represent the switch.
1776   MachineFunction *CurMF = FuncInfo.MF;
1777
1778   // Figure out which block is immediately after the current one.
1779   MachineFunction::iterator BBI = CR.CaseBB;
1780   ++BBI;
1781
1782   const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
1783
1784   // Create a new basic block to hold the code for loading the address
1785   // of the jump table, and jumping to it.  Update successor information;
1786   // we will either branch to the default case for the switch, or the jump
1787   // table.
1788   MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
1789   CurMF->insert(BBI, JumpTableBB);
1790   CR.CaseBB->addSuccessor(Default);
1791   CR.CaseBB->addSuccessor(JumpTableBB);
1792
1793   // Build a vector of destination BBs, corresponding to each target
1794   // of the jump table. If the value of the jump table slot corresponds to
1795   // a case statement, push the case's BB onto the vector, otherwise, push
1796   // the default BB.
1797   std::vector<MachineBasicBlock*> DestBBs;
1798   APInt TEI = First;
1799   for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
1800     const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
1801     const APInt &High = cast<ConstantInt>(I->High)->getValue();
1802
1803     if (Low.sle(TEI) && TEI.sle(High)) {
1804       DestBBs.push_back(I->BB);
1805       if (TEI==High)
1806         ++I;
1807     } else {
1808       DestBBs.push_back(Default);
1809     }
1810   }
1811
1812   // Update successor info. Add one edge to each unique successor.
1813   BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
1814   for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
1815          E = DestBBs.end(); I != E; ++I) {
1816     if (!SuccsHandled[(*I)->getNumber()]) {
1817       SuccsHandled[(*I)->getNumber()] = true;
1818       JumpTableBB->addSuccessor(*I);
1819     }
1820   }
1821
1822   // Create a jump table index for this jump table.
1823   unsigned JTEncoding = TLI.getJumpTableEncoding();
1824   unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
1825                        ->createJumpTableIndex(DestBBs);
1826
1827   // Set the jump table information so that we can codegen it as a second
1828   // MachineBasicBlock
1829   JumpTable JT(-1U, JTI, JumpTableBB, Default);
1830   JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB));
1831   if (CR.CaseBB == SwitchBB)
1832     visitJumpTableHeader(JT, JTH, SwitchBB);
1833
1834   JTCases.push_back(JumpTableBlock(JTH, JT));
1835
1836   return true;
1837 }
1838
1839 /// handleBTSplitSwitchCase - emit comparison and split binary search tree into
1840 /// 2 subtrees.
1841 bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
1842                                                   CaseRecVector& WorkList,
1843                                                   const Value* SV,
1844                                                   MachineBasicBlock *Default,
1845                                                   MachineBasicBlock *SwitchBB) {
1846   // Get the MachineFunction which holds the current MBB.  This is used when
1847   // inserting any additional MBBs necessary to represent the switch.
1848   MachineFunction *CurMF = FuncInfo.MF;
1849
1850   // Figure out which block is immediately after the current one.
1851   MachineFunction::iterator BBI = CR.CaseBB;
1852   ++BBI;
1853
1854   Case& FrontCase = *CR.Range.first;
1855   Case& BackCase  = *(CR.Range.second-1);
1856   const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
1857
1858   // Size is the number of Cases represented by this range.
1859   unsigned Size = CR.Range.second - CR.Range.first;
1860
1861   const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
1862   const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
1863   double FMetric = 0;
1864   CaseItr Pivot = CR.Range.first + Size/2;
1865
1866   // Select optimal pivot, maximizing sum density of LHS and RHS. This will
1867   // (heuristically) allow us to emit JumpTable's later.
1868   APInt TSize(First.getBitWidth(), 0);
1869   for (CaseItr I = CR.Range.first, E = CR.Range.second;
1870        I!=E; ++I)
1871     TSize += I->size();
1872
1873   APInt LSize = FrontCase.size();
1874   APInt RSize = TSize-LSize;
1875   DEBUG(dbgs() << "Selecting best pivot: \n"
1876                << "First: " << First << ", Last: " << Last <<'\n'
1877                << "LSize: " << LSize << ", RSize: " << RSize << '\n');
1878   for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
1879        J!=E; ++I, ++J) {
1880     const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
1881     const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
1882     APInt Range = ComputeRange(LEnd, RBegin);
1883     assert((Range - 2ULL).isNonNegative() &&
1884            "Invalid case distance");
1885     double LDensity = (double)LSize.roundToDouble() /
1886                            (LEnd - First + 1ULL).roundToDouble();
1887     double RDensity = (double)RSize.roundToDouble() /
1888                            (Last - RBegin + 1ULL).roundToDouble();
1889     double Metric = Range.logBase2()*(LDensity+RDensity);
1890     // Should always split in some non-trivial place
1891     DEBUG(dbgs() <<"=>Step\n"
1892                  << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
1893                  << "LDensity: " << LDensity
1894                  << ", RDensity: " << RDensity << '\n'
1895                  << "Metric: " << Metric << '\n');
1896     if (FMetric < Metric) {
1897       Pivot = J;
1898       FMetric = Metric;
1899       DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
1900     }
1901
1902     LSize += J->size();
1903     RSize -= J->size();
1904   }
1905   if (areJTsAllowed(TLI)) {
1906     // If our case is dense we *really* should handle it earlier!
1907     assert((FMetric > 0) && "Should handle dense range earlier!");
1908   } else {
1909     Pivot = CR.Range.first + Size/2;
1910   }
1911
1912   CaseRange LHSR(CR.Range.first, Pivot);
1913   CaseRange RHSR(Pivot, CR.Range.second);
1914   Constant *C = Pivot->Low;
1915   MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
1916
1917   // We know that we branch to the LHS if the Value being switched on is
1918   // less than the Pivot value, C.  We use this to optimize our binary
1919   // tree a bit, by recognizing that if SV is greater than or equal to the
1920   // LHS's Case Value, and that Case Value is exactly one less than the
1921   // Pivot's Value, then we can branch directly to the LHS's Target,
1922   // rather than creating a leaf node for it.
1923   if ((LHSR.second - LHSR.first) == 1 &&
1924       LHSR.first->High == CR.GE &&
1925       cast<ConstantInt>(C)->getValue() ==
1926       (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
1927     TrueBB = LHSR.first->BB;
1928   } else {
1929     TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
1930     CurMF->insert(BBI, TrueBB);
1931     WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
1932
1933     // Put SV in a virtual register to make it available from the new blocks.
1934     ExportFromCurrentBlock(SV);
1935   }
1936
1937   // Similar to the optimization above, if the Value being switched on is
1938   // known to be less than the Constant CR.LT, and the current Case Value
1939   // is CR.LT - 1, then we can branch directly to the target block for
1940   // the current Case Value, rather than emitting a RHS leaf node for it.
1941   if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
1942       cast<ConstantInt>(RHSR.first->Low)->getValue() ==
1943       (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
1944     FalseBB = RHSR.first->BB;
1945   } else {
1946     FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
1947     CurMF->insert(BBI, FalseBB);
1948     WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
1949
1950     // Put SV in a virtual register to make it available from the new blocks.
1951     ExportFromCurrentBlock(SV);
1952   }
1953
1954   // Create a CaseBlock record representing a conditional branch to
1955   // the LHS node if the value being switched on SV is less than C.
1956   // Otherwise, branch to LHS.
1957   CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
1958
1959   if (CR.CaseBB == SwitchBB)
1960     visitSwitchCase(CB, SwitchBB);
1961   else
1962     SwitchCases.push_back(CB);
1963
1964   return true;
1965 }
1966
1967 /// handleBitTestsSwitchCase - if current case range has few destination and
1968 /// range span less, than machine word bitwidth, encode case range into series
1969 /// of masks and emit bit tests with these masks.
1970 bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
1971                                                    CaseRecVector& WorkList,
1972                                                    const Value* SV,
1973                                                    MachineBasicBlock* Default,
1974                                                    MachineBasicBlock *SwitchBB){
1975   EVT PTy = TLI.getPointerTy();
1976   unsigned IntPtrBits = PTy.getSizeInBits();
1977
1978   Case& FrontCase = *CR.Range.first;
1979   Case& BackCase  = *(CR.Range.second-1);
1980
1981   // Get the MachineFunction which holds the current MBB.  This is used when
1982   // inserting any additional MBBs necessary to represent the switch.
1983   MachineFunction *CurMF = FuncInfo.MF;
1984
1985   // If target does not have legal shift left, do not emit bit tests at all.
1986   if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
1987     return false;
1988
1989   size_t numCmps = 0;
1990   for (CaseItr I = CR.Range.first, E = CR.Range.second;
1991        I!=E; ++I) {
1992     // Single case counts one, case range - two.
1993     numCmps += (I->Low == I->High ? 1 : 2);
1994   }
1995
1996   // Count unique destinations
1997   SmallSet<MachineBasicBlock*, 4> Dests;
1998   for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
1999     Dests.insert(I->BB);
2000     if (Dests.size() > 3)
2001       // Don't bother the code below, if there are too much unique destinations
2002       return false;
2003   }
2004   DEBUG(dbgs() << "Total number of unique destinations: "
2005         << Dests.size() << '\n'
2006         << "Total number of comparisons: " << numCmps << '\n');
2007
2008   // Compute span of values.
2009   const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
2010   const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
2011   APInt cmpRange = maxValue - minValue;
2012
2013   DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
2014                << "Low bound: " << minValue << '\n'
2015                << "High bound: " << maxValue << '\n');
2016
2017   if (cmpRange.uge(IntPtrBits) ||
2018       (!(Dests.size() == 1 && numCmps >= 3) &&
2019        !(Dests.size() == 2 && numCmps >= 5) &&
2020        !(Dests.size() >= 3 && numCmps >= 6)))
2021     return false;
2022
2023   DEBUG(dbgs() << "Emitting bit tests\n");
2024   APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
2025
2026   // Optimize the case where all the case values fit in a
2027   // word without having to subtract minValue. In this case,
2028   // we can optimize away the subtraction.
2029   if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
2030     cmpRange = maxValue;
2031   } else {
2032     lowBound = minValue;
2033   }
2034
2035   CaseBitsVector CasesBits;
2036   unsigned i, count = 0;
2037
2038   for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
2039     MachineBasicBlock* Dest = I->BB;
2040     for (i = 0; i < count; ++i)
2041       if (Dest == CasesBits[i].BB)
2042         break;
2043
2044     if (i == count) {
2045       assert((count < 3) && "Too much destinations to test!");
2046       CasesBits.push_back(CaseBits(0, Dest, 0));
2047       count++;
2048     }
2049
2050     const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
2051     const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
2052
2053     uint64_t lo = (lowValue - lowBound).getZExtValue();
2054     uint64_t hi = (highValue - lowBound).getZExtValue();
2055
2056     for (uint64_t j = lo; j <= hi; j++) {
2057       CasesBits[i].Mask |=  1ULL << j;
2058       CasesBits[i].Bits++;
2059     }
2060
2061   }
2062   std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
2063
2064   BitTestInfo BTC;
2065
2066   // Figure out which block is immediately after the current one.
2067   MachineFunction::iterator BBI = CR.CaseBB;
2068   ++BBI;
2069
2070   const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
2071
2072   DEBUG(dbgs() << "Cases:\n");
2073   for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
2074     DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
2075                  << ", Bits: " << CasesBits[i].Bits
2076                  << ", BB: " << CasesBits[i].BB << '\n');
2077
2078     MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
2079     CurMF->insert(BBI, CaseBB);
2080     BTC.push_back(BitTestCase(CasesBits[i].Mask,
2081                               CaseBB,
2082                               CasesBits[i].BB));
2083
2084     // Put SV in a virtual register to make it available from the new blocks.
2085     ExportFromCurrentBlock(SV);
2086   }
2087
2088   BitTestBlock BTB(lowBound, cmpRange, SV,
2089                    -1U, (CR.CaseBB == SwitchBB),
2090                    CR.CaseBB, Default, BTC);
2091
2092   if (CR.CaseBB == SwitchBB)
2093     visitBitTestHeader(BTB, SwitchBB);
2094
2095   BitTestCases.push_back(BTB);
2096
2097   return true;
2098 }
2099
2100 /// Clusterify - Transform simple list of Cases into list of CaseRange's
2101 size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
2102                                        const SwitchInst& SI) {
2103   size_t numCmps = 0;
2104
2105   // Start with "simple" cases
2106   for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
2107     MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
2108     Cases.push_back(Case(SI.getSuccessorValue(i),
2109                          SI.getSuccessorValue(i),
2110                          SMBB));
2111   }
2112   std::sort(Cases.begin(), Cases.end(), CaseCmp());
2113
2114   // Merge case into clusters
2115   if (Cases.size() >= 2)
2116     // Must recompute end() each iteration because it may be
2117     // invalidated by erase if we hold on to it
2118     for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
2119       const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
2120       const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
2121       MachineBasicBlock* nextBB = J->BB;
2122       MachineBasicBlock* currentBB = I->BB;
2123
2124       // If the two neighboring cases go to the same destination, merge them
2125       // into a single case.
2126       if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
2127         I->High = J->High;
2128         J = Cases.erase(J);
2129       } else {
2130         I = J++;
2131       }
2132     }
2133
2134   for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
2135     if (I->Low != I->High)
2136       // A range counts double, since it requires two compares.
2137       ++numCmps;
2138   }
2139
2140   return numCmps;
2141 }
2142
2143 void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
2144   MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
2145
2146   // Figure out which block is immediately after the current one.
2147   MachineBasicBlock *NextBlock = 0;
2148   MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
2149
2150   // If there is only the default destination, branch to it if it is not the
2151   // next basic block.  Otherwise, just fall through.
2152   if (SI.getNumOperands() == 2) {
2153     // Update machine-CFG edges.
2154
2155     // If this is not a fall-through branch, emit the branch.
2156     SwitchMBB->addSuccessor(Default);
2157     if (Default != NextBlock)
2158       DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
2159                               MVT::Other, getControlRoot(),
2160                               DAG.getBasicBlock(Default)));
2161
2162     return;
2163   }
2164
2165   // If there are any non-default case statements, create a vector of Cases
2166   // representing each one, and sort the vector so that we can efficiently
2167   // create a binary search tree from them.
2168   CaseVector Cases;
2169   size_t numCmps = Clusterify(Cases, SI);
2170   DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
2171                << ". Total compares: " << numCmps << '\n');
2172   numCmps = 0;
2173
2174   // Get the Value to be switched on and default basic blocks, which will be
2175   // inserted into CaseBlock records, representing basic blocks in the binary
2176   // search tree.
2177   const Value *SV = SI.getOperand(0);
2178
2179   // Push the initial CaseRec onto the worklist
2180   CaseRecVector WorkList;
2181   WorkList.push_back(CaseRec(SwitchMBB,0,0,
2182                              CaseRange(Cases.begin(),Cases.end())));
2183
2184   while (!WorkList.empty()) {
2185     // Grab a record representing a case range to process off the worklist
2186     CaseRec CR = WorkList.back();
2187     WorkList.pop_back();
2188
2189     if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
2190       continue;
2191
2192     // If the range has few cases (two or less) emit a series of specific
2193     // tests.
2194     if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
2195       continue;
2196
2197     // If the switch has more than 5 blocks, and at least 40% dense, and the
2198     // target supports indirect branches, then emit a jump table rather than
2199     // lowering the switch to a binary tree of conditional branches.
2200     if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
2201       continue;
2202
2203     // Emit binary tree. We need to pick a pivot, and push left and right ranges
2204     // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
2205     handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB);
2206   }
2207 }
2208
2209 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
2210   MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
2211
2212   // Update machine-CFG edges with unique successors.
2213   SmallVector<BasicBlock*, 32> succs;
2214   succs.reserve(I.getNumSuccessors());
2215   for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
2216     succs.push_back(I.getSuccessor(i));
2217   array_pod_sort(succs.begin(), succs.end());
2218   succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
2219   for (unsigned i = 0, e = succs.size(); i != e; ++i)
2220     IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
2221
2222   DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
2223                           MVT::Other, getControlRoot(),
2224                           getValue(I.getAddress())));
2225 }
2226
2227 void SelectionDAGBuilder::visitFSub(const User &I) {
2228   // -0.0 - X --> fneg
2229   const Type *Ty = I.getType();
2230   if (Ty->isVectorTy()) {
2231     if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
2232       const VectorType *DestTy = cast<VectorType>(I.getType());
2233       const Type *ElTy = DestTy->getElementType();
2234       unsigned VL = DestTy->getNumElements();
2235       std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
2236       Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
2237       if (CV == CNZ) {
2238         SDValue Op2 = getValue(I.getOperand(1));
2239         setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
2240                                  Op2.getValueType(), Op2));
2241         return;
2242       }
2243     }
2244   }
2245
2246   if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
2247     if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
2248       SDValue Op2 = getValue(I.getOperand(1));
2249       setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
2250                                Op2.getValueType(), Op2));
2251       return;
2252     }
2253
2254   visitBinary(I, ISD::FSUB);
2255 }
2256
2257 void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
2258   SDValue Op1 = getValue(I.getOperand(0));
2259   SDValue Op2 = getValue(I.getOperand(1));
2260   setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
2261                            Op1.getValueType(), Op1, Op2));
2262 }
2263
2264 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
2265   SDValue Op1 = getValue(I.getOperand(0));
2266   SDValue Op2 = getValue(I.getOperand(1));
2267   if (!I.getType()->isVectorTy() &&
2268       Op2.getValueType() != TLI.getShiftAmountTy()) {
2269     // If the operand is smaller than the shift count type, promote it.
2270     EVT PTy = TLI.getPointerTy();
2271     EVT STy = TLI.getShiftAmountTy();
2272     if (STy.bitsGT(Op2.getValueType()))
2273       Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
2274                         TLI.getShiftAmountTy(), Op2);
2275     // If the operand is larger than the shift count type but the shift
2276     // count type has enough bits to represent any shift value, truncate
2277     // it now. This is a common case and it exposes the truncate to
2278     // optimization early.
2279     else if (STy.getSizeInBits() >=
2280              Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
2281       Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
2282                         TLI.getShiftAmountTy(), Op2);
2283     // Otherwise we'll need to temporarily settle for some other
2284     // convenient type; type legalization will make adjustments as
2285     // needed.
2286     else if (PTy.bitsLT(Op2.getValueType()))
2287       Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
2288                         TLI.getPointerTy(), Op2);
2289     else if (PTy.bitsGT(Op2.getValueType()))
2290       Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
2291                         TLI.getPointerTy(), Op2);
2292   }
2293
2294   setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
2295                            Op1.getValueType(), Op1, Op2));
2296 }
2297
2298 void SelectionDAGBuilder::visitICmp(const User &I) {
2299   ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
2300   if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
2301     predicate = IC->getPredicate();
2302   else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
2303     predicate = ICmpInst::Predicate(IC->getPredicate());
2304   SDValue Op1 = getValue(I.getOperand(0));
2305   SDValue Op2 = getValue(I.getOperand(1));
2306   ISD::CondCode Opcode = getICmpCondCode(predicate);
2307
2308   EVT DestVT = TLI.getValueType(I.getType());
2309   setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
2310 }
2311
2312 void SelectionDAGBuilder::visitFCmp(const User &I) {
2313   FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
2314   if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
2315     predicate = FC->getPredicate();
2316   else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
2317     predicate = FCmpInst::Predicate(FC->getPredicate());
2318   SDValue Op1 = getValue(I.getOperand(0));
2319   SDValue Op2 = getValue(I.getOperand(1));
2320   ISD::CondCode Condition = getFCmpCondCode(predicate);
2321   EVT DestVT = TLI.getValueType(I.getType());
2322   setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
2323 }
2324
2325 void SelectionDAGBuilder::visitSelect(const User &I) {
2326   SmallVector<EVT, 4> ValueVTs;
2327   ComputeValueVTs(TLI, I.getType(), ValueVTs);
2328   unsigned NumValues = ValueVTs.size();
2329   if (NumValues == 0) return;
2330
2331   SmallVector<SDValue, 4> Values(NumValues);
2332   SDValue Cond     = getValue(I.getOperand(0));
2333   SDValue TrueVal  = getValue(I.getOperand(1));
2334   SDValue FalseVal = getValue(I.getOperand(2));
2335
2336   for (unsigned i = 0; i != NumValues; ++i)
2337     Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
2338                           TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
2339                             Cond,
2340                             SDValue(TrueVal.getNode(),
2341                                     TrueVal.getResNo() + i),
2342                             SDValue(FalseVal.getNode(),
2343                                     FalseVal.getResNo() + i));
2344
2345   setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
2346                            DAG.getVTList(&ValueVTs[0], NumValues),
2347                            &Values[0], NumValues));
2348 }
2349
2350 void SelectionDAGBuilder::visitTrunc(const User &I) {
2351   // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
2352   SDValue N = getValue(I.getOperand(0));
2353   EVT DestVT = TLI.getValueType(I.getType());
2354   setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
2355 }
2356
2357 void SelectionDAGBuilder::visitZExt(const User &I) {
2358   // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2359   // ZExt also can't be a cast to bool for same reason. So, nothing much to do
2360   SDValue N = getValue(I.getOperand(0));
2361   EVT DestVT = TLI.getValueType(I.getType());
2362   setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
2363 }
2364
2365 void SelectionDAGBuilder::visitSExt(const User &I) {
2366   // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2367   // SExt also can't be a cast to bool for same reason. So, nothing much to do
2368   SDValue N = getValue(I.getOperand(0));
2369   EVT DestVT = TLI.getValueType(I.getType());
2370   setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
2371 }
2372
2373 void SelectionDAGBuilder::visitFPTrunc(const User &I) {
2374   // FPTrunc is never a no-op cast, no need to check
2375   SDValue N = getValue(I.getOperand(0));
2376   EVT DestVT = TLI.getValueType(I.getType());
2377   setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
2378                            DestVT, N, DAG.getIntPtrConstant(0)));
2379 }
2380
2381 void SelectionDAGBuilder::visitFPExt(const User &I){
2382   // FPTrunc is never a no-op cast, no need to check
2383   SDValue N = getValue(I.getOperand(0));
2384   EVT DestVT = TLI.getValueType(I.getType());
2385   setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
2386 }
2387
2388 void SelectionDAGBuilder::visitFPToUI(const User &I) {
2389   // FPToUI is never a no-op cast, no need to check
2390   SDValue N = getValue(I.getOperand(0));
2391   EVT DestVT = TLI.getValueType(I.getType());
2392   setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
2393 }
2394
2395 void SelectionDAGBuilder::visitFPToSI(const User &I) {
2396   // FPToSI is never a no-op cast, no need to check
2397   SDValue N = getValue(I.getOperand(0));
2398   EVT DestVT = TLI.getValueType(I.getType());
2399   setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
2400 }
2401
2402 void SelectionDAGBuilder::visitUIToFP(const User &I) {
2403   // UIToFP is never a no-op cast, no need to check
2404   SDValue N = getValue(I.getOperand(0));
2405   EVT DestVT = TLI.getValueType(I.getType());
2406   setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
2407 }
2408
2409 void SelectionDAGBuilder::visitSIToFP(const User &I){
2410   // SIToFP is never a no-op cast, no need to check
2411   SDValue N = getValue(I.getOperand(0));
2412   EVT DestVT = TLI.getValueType(I.getType());
2413   setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
2414 }
2415
2416 void SelectionDAGBuilder::visitPtrToInt(const User &I) {
2417   // What to do depends on the size of the integer and the size of the pointer.
2418   // We can either truncate, zero extend, or no-op, accordingly.
2419   SDValue N = getValue(I.getOperand(0));
2420   EVT DestVT = TLI.getValueType(I.getType());
2421   setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
2422 }
2423
2424 void SelectionDAGBuilder::visitIntToPtr(const User &I) {
2425   // What to do depends on the size of the integer and the size of the pointer.
2426   // We can either truncate, zero extend, or no-op, accordingly.
2427   SDValue N = getValue(I.getOperand(0));
2428   EVT DestVT = TLI.getValueType(I.getType());
2429   setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
2430 }
2431
2432 void SelectionDAGBuilder::visitBitCast(const User &I) {
2433   SDValue N = getValue(I.getOperand(0));
2434   EVT DestVT = TLI.getValueType(I.getType());
2435
2436   // BitCast assures us that source and destination are the same size so this is
2437   // either a BIT_CONVERT or a no-op.
2438   if (DestVT != N.getValueType())
2439     setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
2440                              DestVT, N)); // convert types.
2441   else
2442     setValue(&I, N);            // noop cast.
2443 }
2444
2445 void SelectionDAGBuilder::visitInsertElement(const User &I) {
2446   SDValue InVec = getValue(I.getOperand(0));
2447   SDValue InVal = getValue(I.getOperand(1));
2448   SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
2449                               TLI.getPointerTy(),
2450                               getValue(I.getOperand(2)));
2451   setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
2452                            TLI.getValueType(I.getType()),
2453                            InVec, InVal, InIdx));
2454 }
2455
2456 void SelectionDAGBuilder::visitExtractElement(const User &I) {
2457   SDValue InVec = getValue(I.getOperand(0));
2458   SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
2459                               TLI.getPointerTy(),
2460                               getValue(I.getOperand(1)));
2461   setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
2462                            TLI.getValueType(I.getType()), InVec, InIdx));
2463 }
2464
2465 // Utility for visitShuffleVector - Returns true if the mask is mask starting
2466 // from SIndx and increasing to the element length (undefs are allowed).
2467 static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
2468   unsigned MaskNumElts = Mask.size();
2469   for (unsigned i = 0; i != MaskNumElts; ++i)
2470     if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
2471       return false;
2472   return true;
2473 }
2474
2475 void SelectionDAGBuilder::visitShuffleVector(const User &I) {
2476   SmallVector<int, 8> Mask;
2477   SDValue Src1 = getValue(I.getOperand(0));
2478   SDValue Src2 = getValue(I.getOperand(1));
2479
2480   // Convert the ConstantVector mask operand into an array of ints, with -1
2481   // representing undef values.
2482   SmallVector<Constant*, 8> MaskElts;
2483   cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
2484   unsigned MaskNumElts = MaskElts.size();
2485   for (unsigned i = 0; i != MaskNumElts; ++i) {
2486     if (isa<UndefValue>(MaskElts[i]))
2487       Mask.push_back(-1);
2488     else
2489       Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
2490   }
2491
2492   EVT VT = TLI.getValueType(I.getType());
2493   EVT SrcVT = Src1.getValueType();
2494   unsigned SrcNumElts = SrcVT.getVectorNumElements();
2495
2496   if (SrcNumElts == MaskNumElts) {
2497     setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
2498                                       &Mask[0]));
2499     return;
2500   }
2501
2502   // Normalize the shuffle vector since mask and vector length don't match.
2503   if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
2504     // Mask is longer than the source vectors and is a multiple of the source
2505     // vectors.  We can use concatenate vector to make the mask and vectors
2506     // lengths match.
2507     if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
2508       // The shuffle is concatenating two vectors together.
2509       setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
2510                                VT, Src1, Src2));
2511       return;
2512     }
2513
2514     // Pad both vectors with undefs to make them the same length as the mask.
2515     unsigned NumConcat = MaskNumElts / SrcNumElts;
2516     bool Src1U = Src1.getOpcode() == ISD::UNDEF;
2517     bool Src2U = Src2.getOpcode() == ISD::UNDEF;
2518     SDValue UndefVal = DAG.getUNDEF(SrcVT);
2519
2520     SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
2521     SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
2522     MOps1[0] = Src1;
2523     MOps2[0] = Src2;
2524
2525     Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
2526                                                   getCurDebugLoc(), VT,
2527                                                   &MOps1[0], NumConcat);
2528     Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
2529                                                   getCurDebugLoc(), VT,
2530                                                   &MOps2[0], NumConcat);
2531
2532     // Readjust mask for new input vector length.
2533     SmallVector<int, 8> MappedOps;
2534     for (unsigned i = 0; i != MaskNumElts; ++i) {
2535       int Idx = Mask[i];
2536       if (Idx < (int)SrcNumElts)
2537         MappedOps.push_back(Idx);
2538       else
2539         MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
2540     }
2541
2542     setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
2543                                       &MappedOps[0]));
2544     return;
2545   }
2546
2547   if (SrcNumElts > MaskNumElts) {
2548     // Analyze the access pattern of the vector to see if we can extract
2549     // two subvectors and do the shuffle. The analysis is done by calculating
2550     // the range of elements the mask access on both vectors.
2551     int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
2552     int MaxRange[2] = {-1, -1};
2553
2554     for (unsigned i = 0; i != MaskNumElts; ++i) {
2555       int Idx = Mask[i];
2556       int Input = 0;
2557       if (Idx < 0)
2558         continue;
2559
2560       if (Idx >= (int)SrcNumElts) {
2561         Input = 1;
2562         Idx -= SrcNumElts;
2563       }
2564       if (Idx > MaxRange[Input])
2565         MaxRange[Input] = Idx;
2566       if (Idx < MinRange[Input])
2567         MinRange[Input] = Idx;
2568     }
2569
2570     // Check if the access is smaller than the vector size and can we find
2571     // a reasonable extract index.
2572     int RangeUse[2] = { 2, 2 };  // 0 = Unused, 1 = Extract, 2 = Can not
2573                                  // Extract.
2574     int StartIdx[2];  // StartIdx to extract from
2575     for (int Input=0; Input < 2; ++Input) {
2576       if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
2577         RangeUse[Input] = 0; // Unused
2578         StartIdx[Input] = 0;
2579       } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
2580         // Fits within range but we should see if we can find a good
2581         // start index that is a multiple of the mask length.
2582         if (MaxRange[Input] < (int)MaskNumElts) {
2583           RangeUse[Input] = 1; // Extract from beginning of the vector
2584           StartIdx[Input] = 0;
2585         } else {
2586           StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
2587           if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
2588               StartIdx[Input] + MaskNumElts < SrcNumElts)
2589             RangeUse[Input] = 1; // Extract from a multiple of the mask length.
2590         }
2591       }
2592     }
2593
2594     if (RangeUse[0] == 0 && RangeUse[1] == 0) {
2595       setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
2596       return;
2597     }
2598     else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
2599       // Extract appropriate subvector and generate a vector shuffle
2600       for (int Input=0; Input < 2; ++Input) {
2601         SDValue &Src = Input == 0 ? Src1 : Src2;
2602         if (RangeUse[Input] == 0)
2603           Src = DAG.getUNDEF(VT);
2604         else
2605           Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,
2606                             Src, DAG.getIntPtrConstant(StartIdx[Input]));
2607       }
2608
2609       // Calculate new mask.
2610       SmallVector<int, 8> MappedOps;
2611       for (unsigned i = 0; i != MaskNumElts; ++i) {
2612         int Idx = Mask[i];
2613         if (Idx < 0)
2614           MappedOps.push_back(Idx);
2615         else if (Idx < (int)SrcNumElts)
2616           MappedOps.push_back(Idx - StartIdx[0]);
2617         else
2618           MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
2619       }
2620
2621       setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
2622                                         &MappedOps[0]));
2623       return;
2624     }
2625   }
2626
2627   // We can't use either concat vectors or extract subvectors so fall back to
2628   // replacing the shuffle with extract and build vector.
2629   // to insert and build vector.
2630   EVT EltVT = VT.getVectorElementType();
2631   EVT PtrVT = TLI.getPointerTy();
2632   SmallVector<SDValue,8> Ops;
2633   for (unsigned i = 0; i != MaskNumElts; ++i) {
2634     if (Mask[i] < 0) {
2635       Ops.push_back(DAG.getUNDEF(EltVT));
2636     } else {
2637       int Idx = Mask[i];
2638       SDValue Res;
2639
2640       if (Idx < (int)SrcNumElts)
2641         Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
2642                           EltVT, Src1, DAG.getConstant(Idx, PtrVT));
2643       else
2644         Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
2645                           EltVT, Src2,
2646                           DAG.getConstant(Idx - SrcNumElts, PtrVT));
2647
2648       Ops.push_back(Res);
2649     }
2650   }
2651
2652   setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
2653                            VT, &Ops[0], Ops.size()));
2654 }
2655
2656 void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
2657   const Value *Op0 = I.getOperand(0);
2658   const Value *Op1 = I.getOperand(1);
2659   const Type *AggTy = I.getType();
2660   const Type *ValTy = Op1->getType();
2661   bool IntoUndef = isa<UndefValue>(Op0);
2662   bool FromUndef = isa<UndefValue>(Op1);
2663
2664   unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
2665                                             I.idx_begin(), I.idx_end());
2666
2667   SmallVector<EVT, 4> AggValueVTs;
2668   ComputeValueVTs(TLI, AggTy, AggValueVTs);
2669   SmallVector<EVT, 4> ValValueVTs;
2670   ComputeValueVTs(TLI, ValTy, ValValueVTs);
2671
2672   unsigned NumAggValues = AggValueVTs.size();
2673   unsigned NumValValues = ValValueVTs.size();
2674   SmallVector<SDValue, 4> Values(NumAggValues);
2675
2676   SDValue Agg = getValue(Op0);
2677   SDValue Val = getValue(Op1);
2678   unsigned i = 0;
2679   // Copy the beginning value(s) from the original aggregate.
2680   for (; i != LinearIndex; ++i)
2681     Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
2682                 SDValue(Agg.getNode(), Agg.getResNo() + i);
2683   // Copy values from the inserted value(s).
2684   for (; i != LinearIndex + NumValValues; ++i)
2685     Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
2686                 SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
2687   // Copy remaining value(s) from the original aggregate.
2688   for (; i != NumAggValues; ++i)
2689     Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
2690                 SDValue(Agg.getNode(), Agg.getResNo() + i);
2691
2692   setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
2693                            DAG.getVTList(&AggValueVTs[0], NumAggValues),
2694                            &Values[0], NumAggValues));
2695 }
2696
2697 void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
2698   const Value *Op0 = I.getOperand(0);
2699   const Type *AggTy = Op0->getType();
2700   const Type *ValTy = I.getType();
2701   bool OutOfUndef = isa<UndefValue>(Op0);
2702
2703   unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
2704                                             I.idx_begin(), I.idx_end());
2705
2706   SmallVector<EVT, 4> ValValueVTs;
2707   ComputeValueVTs(TLI, ValTy, ValValueVTs);
2708
2709   unsigned NumValValues = ValValueVTs.size();
2710   SmallVector<SDValue, 4> Values(NumValValues);
2711
2712   SDValue Agg = getValue(Op0);
2713   // Copy out the selected value(s).
2714   for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
2715     Values[i - LinearIndex] =
2716       OutOfUndef ?
2717         DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
2718         SDValue(Agg.getNode(), Agg.getResNo() + i);
2719
2720   setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
2721                            DAG.getVTList(&ValValueVTs[0], NumValValues),
2722                            &Values[0], NumValValues));
2723 }
2724
2725 void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
2726   SDValue N = getValue(I.getOperand(0));
2727   const Type *Ty = I.getOperand(0)->getType();
2728
2729   for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
2730        OI != E; ++OI) {
2731     const Value *Idx = *OI;
2732     if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
2733       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
2734       if (Field) {
2735         // N = N + Offset
2736         uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
2737         N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
2738                         DAG.getIntPtrConstant(Offset));
2739       }
2740
2741       Ty = StTy->getElementType(Field);
2742     } else if (const UnionType *UnTy = dyn_cast<UnionType>(Ty)) {
2743       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
2744
2745       // Offset canonically 0 for unions, but type changes
2746       Ty = UnTy->getElementType(Field);
2747     } else {
2748       Ty = cast<SequentialType>(Ty)->getElementType();
2749
2750       // If this is a constant subscript, handle it quickly.
2751       if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
2752         if (CI->isZero()) continue;
2753         uint64_t Offs =
2754             TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
2755         SDValue OffsVal;
2756         EVT PTy = TLI.getPointerTy();
2757         unsigned PtrBits = PTy.getSizeInBits();
2758         if (PtrBits < 64)
2759           OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
2760                                 TLI.getPointerTy(),
2761                                 DAG.getConstant(Offs, MVT::i64));
2762         else
2763           OffsVal = DAG.getIntPtrConstant(Offs);
2764
2765         N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
2766                         OffsVal);
2767         continue;
2768       }
2769
2770       // N = N + Idx * ElementSize;
2771       APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(),
2772                                 TD->getTypeAllocSize(Ty));
2773       SDValue IdxN = getValue(Idx);
2774
2775       // If the index is smaller or larger than intptr_t, truncate or extend
2776       // it.
2777       IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType());
2778
2779       // If this is a multiply by a power of two, turn it into a shl
2780       // immediately.  This is a very common case.
2781       if (ElementSize != 1) {
2782         if (ElementSize.isPowerOf2()) {
2783           unsigned Amt = ElementSize.logBase2();
2784           IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
2785                              N.getValueType(), IdxN,
2786                              DAG.getConstant(Amt, TLI.getPointerTy()));
2787         } else {
2788           SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
2789           IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
2790                              N.getValueType(), IdxN, Scale);
2791         }
2792       }
2793
2794       N = DAG.getNode(ISD::ADD, getCurDebugLoc(),
2795                       N.getValueType(), N, IdxN);
2796     }
2797   }
2798
2799   setValue(&I, N);
2800 }
2801
2802 void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
2803   // If this is a fixed sized alloca in the entry block of the function,
2804   // allocate it statically on the stack.
2805   if (FuncInfo.StaticAllocaMap.count(&I))
2806     return;   // getValue will auto-populate this.
2807
2808   const Type *Ty = I.getAllocatedType();
2809   uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
2810   unsigned Align =
2811     std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
2812              I.getAlignment());
2813
2814   SDValue AllocSize = getValue(I.getArraySize());
2815
2816   EVT IntPtr = TLI.getPointerTy();
2817   if (AllocSize.getValueType() != IntPtr)
2818     AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
2819
2820   AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr,
2821                           AllocSize,
2822                           DAG.getConstant(TySize, IntPtr));
2823
2824   // Handle alignment.  If the requested alignment is less than or equal to
2825   // the stack alignment, ignore it.  If the size is greater than or equal to
2826   // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
2827   unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
2828   if (Align <= StackAlign)
2829     Align = 0;
2830
2831   // Round the size of the allocation up to the stack alignment size
2832   // by add SA-1 to the size.
2833   AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),
2834                           AllocSize.getValueType(), AllocSize,
2835                           DAG.getIntPtrConstant(StackAlign-1));
2836
2837   // Mask out the low bits for alignment purposes.
2838   AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),
2839                           AllocSize.getValueType(), AllocSize,
2840                           DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
2841
2842   SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
2843   SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
2844   SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),
2845                             VTs, Ops, 3);
2846   setValue(&I, DSA);
2847   DAG.setRoot(DSA.getValue(1));
2848
2849   // Inform the Frame Information that we have just allocated a variable-sized
2850   // object.
2851   FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1);
2852 }
2853
2854 void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
2855   const Value *SV = I.getOperand(0);
2856   SDValue Ptr = getValue(SV);
2857
2858   const Type *Ty = I.getType();
2859
2860   bool isVolatile = I.isVolatile();
2861   bool isNonTemporal = I.getMetadata("nontemporal") != 0;
2862   unsigned Alignment = I.getAlignment();
2863
2864   SmallVector<EVT, 4> ValueVTs;
2865   SmallVector<uint64_t, 4> Offsets;
2866   ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
2867   unsigned NumValues = ValueVTs.size();
2868   if (NumValues == 0)
2869     return;
2870
2871   SDValue Root;
2872   bool ConstantMemory = false;
2873   if (I.isVolatile())
2874     // Serialize volatile loads with other side effects.
2875     Root = getRoot();
2876   else if (AA->pointsToConstantMemory(SV)) {
2877     // Do not serialize (non-volatile) loads of constant memory with anything.
2878     Root = DAG.getEntryNode();
2879     ConstantMemory = true;
2880   } else {
2881     // Do not serialize non-volatile loads against each other.
2882     Root = DAG.getRoot();
2883   }
2884
2885   SmallVector<SDValue, 4> Values(NumValues);
2886   SmallVector<SDValue, 4> Chains(NumValues);
2887   EVT PtrVT = Ptr.getValueType();
2888   for (unsigned i = 0; i != NumValues; ++i) {
2889     SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
2890                             PtrVT, Ptr,
2891                             DAG.getConstant(Offsets[i], PtrVT));
2892     SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
2893                             A, SV, Offsets[i], isVolatile,
2894                             isNonTemporal, Alignment);
2895
2896     Values[i] = L;
2897     Chains[i] = L.getValue(1);
2898   }
2899
2900   if (!ConstantMemory) {
2901     SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
2902                                 MVT::Other, &Chains[0], NumValues);
2903     if (isVolatile)
2904       DAG.setRoot(Chain);
2905     else
2906       PendingLoads.push_back(Chain);
2907   }
2908
2909   setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
2910                            DAG.getVTList(&ValueVTs[0], NumValues),
2911                            &Values[0], NumValues));
2912 }
2913
2914 void SelectionDAGBuilder::visitStore(const StoreInst &I) {
2915   const Value *SrcV = I.getOperand(0);
2916   const Value *PtrV = I.getOperand(1);
2917
2918   SmallVector<EVT, 4> ValueVTs;
2919   SmallVector<uint64_t, 4> Offsets;
2920   ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
2921   unsigned NumValues = ValueVTs.size();
2922   if (NumValues == 0)
2923     return;
2924
2925   // Get the lowered operands. Note that we do this after
2926   // checking if NumResults is zero, because with zero results
2927   // the operands won't have values in the map.
2928   SDValue Src = getValue(SrcV);
2929   SDValue Ptr = getValue(PtrV);
2930
2931   SDValue Root = getRoot();
2932   SmallVector<SDValue, 4> Chains(NumValues);
2933   EVT PtrVT = Ptr.getValueType();
2934   bool isVolatile = I.isVolatile();
2935   bool isNonTemporal = I.getMetadata("nontemporal") != 0;
2936   unsigned Alignment = I.getAlignment();
2937
2938   for (unsigned i = 0; i != NumValues; ++i) {
2939     SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
2940                               DAG.getConstant(Offsets[i], PtrVT));
2941     Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
2942                              SDValue(Src.getNode(), Src.getResNo() + i),
2943                              Add, PtrV, Offsets[i], isVolatile,
2944                              isNonTemporal, Alignment);
2945   }
2946
2947   DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
2948                           MVT::Other, &Chains[0], NumValues));
2949 }
2950
2951 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
2952 /// node.
2953 void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
2954                                                unsigned Intrinsic) {
2955   bool HasChain = !I.doesNotAccessMemory();
2956   bool OnlyLoad = HasChain && I.onlyReadsMemory();
2957
2958   // Build the operand list.
2959   SmallVector<SDValue, 8> Ops;
2960   if (HasChain) {  // If this intrinsic has side-effects, chainify it.
2961     if (OnlyLoad) {
2962       // We don't need to serialize loads against other loads.
2963       Ops.push_back(DAG.getRoot());
2964     } else {
2965       Ops.push_back(getRoot());
2966     }
2967   }
2968
2969   // Info is set by getTgtMemInstrinsic
2970   TargetLowering::IntrinsicInfo Info;
2971   bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
2972
2973   // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
2974   if (!IsTgtIntrinsic)
2975     Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
2976
2977   // Add all operands of the call to the operand list.
2978   for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
2979     SDValue Op = getValue(I.getArgOperand(i));
2980     assert(TLI.isTypeLegal(Op.getValueType()) &&
2981            "Intrinsic uses a non-legal type?");
2982     Ops.push_back(Op);
2983   }
2984
2985   SmallVector<EVT, 4> ValueVTs;
2986   ComputeValueVTs(TLI, I.getType(), ValueVTs);
2987 #ifndef NDEBUG
2988   for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
2989     assert(TLI.isTypeLegal(ValueVTs[Val]) &&
2990            "Intrinsic uses a non-legal type?");
2991   }
2992 #endif // NDEBUG
2993
2994   if (HasChain)
2995     ValueVTs.push_back(MVT::Other);
2996
2997   SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
2998
2999   // Create the node.
3000   SDValue Result;
3001   if (IsTgtIntrinsic) {
3002     // This is target intrinsic that touches memory
3003     Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
3004                                      VTs, &Ops[0], Ops.size(),
3005                                      Info.memVT, Info.ptrVal, Info.offset,
3006                                      Info.align, Info.vol,
3007                                      Info.readMem, Info.writeMem);
3008   } else if (!HasChain) {
3009     Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
3010                          VTs, &Ops[0], Ops.size());
3011   } else if (!I.getType()->isVoidTy()) {
3012     Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
3013                          VTs, &Ops[0], Ops.size());
3014   } else {
3015     Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),
3016                          VTs, &Ops[0], Ops.size());
3017   }
3018
3019   if (HasChain) {
3020     SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
3021     if (OnlyLoad)
3022       PendingLoads.push_back(Chain);
3023     else
3024       DAG.setRoot(Chain);
3025   }
3026
3027   if (!I.getType()->isVoidTy()) {
3028     if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
3029       EVT VT = TLI.getValueType(PTy);
3030       Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
3031     }
3032
3033     setValue(&I, Result);
3034   }
3035 }
3036
3037 /// GetSignificand - Get the significand and build it into a floating-point
3038 /// number with exponent of 1:
3039 ///
3040 ///   Op = (Op & 0x007fffff) | 0x3f800000;
3041 ///
3042 /// where Op is the hexidecimal representation of floating point value.
3043 static SDValue
3044 GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
3045   SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
3046                            DAG.getConstant(0x007fffff, MVT::i32));
3047   SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
3048                            DAG.getConstant(0x3f800000, MVT::i32));
3049   return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
3050 }
3051
3052 /// GetExponent - Get the exponent:
3053 ///
3054 ///   (float)(int)(((Op & 0x7f800000) >> 23) - 127);
3055 ///
3056 /// where Op is the hexidecimal representation of floating point value.
3057 static SDValue
3058 GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
3059             DebugLoc dl) {
3060   SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
3061                            DAG.getConstant(0x7f800000, MVT::i32));
3062   SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
3063                            DAG.getConstant(23, TLI.getPointerTy()));
3064   SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
3065                            DAG.getConstant(127, MVT::i32));
3066   return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
3067 }
3068
3069 /// getF32Constant - Get 32-bit floating point constant.
3070 static SDValue
3071 getF32Constant(SelectionDAG &DAG, unsigned Flt) {
3072   return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
3073 }
3074
3075 /// Inlined utility function to implement binary input atomic intrinsics for
3076 /// visitIntrinsicCall: I is a call instruction
3077 ///                     Op is the associated NodeType for I
3078 const char *
3079 SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I,
3080                                            ISD::NodeType Op) {
3081   SDValue Root = getRoot();
3082   SDValue L =
3083     DAG.getAtomic(Op, getCurDebugLoc(),
3084                   getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
3085                   Root,
3086                   getValue(I.getArgOperand(0)),
3087                   getValue(I.getArgOperand(1)),
3088                   I.getArgOperand(0));
3089   setValue(&I, L);
3090   DAG.setRoot(L.getValue(1));
3091   return 0;
3092 }
3093
3094 // implVisitAluOverflow - Lower arithmetic overflow instrinsics.
3095 const char *
3096 SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
3097   SDValue Op1 = getValue(I.getArgOperand(0));
3098   SDValue Op2 = getValue(I.getArgOperand(1));
3099
3100   SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
3101   setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
3102   return 0;
3103 }
3104
3105 /// visitExp - Lower an exp intrinsic. Handles the special sequences for
3106 /// limited-precision mode.
3107 void
3108 SelectionDAGBuilder::visitExp(const CallInst &I) {
3109   SDValue result;
3110   DebugLoc dl = getCurDebugLoc();
3111
3112   if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
3113       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3114     SDValue Op = getValue(I.getArgOperand(0));
3115
3116     // Put the exponent in the right bit position for later addition to the
3117     // final result:
3118     //
3119     //   #define LOG2OFe 1.4426950f
3120     //   IntegerPartOfX = ((int32_t)(X * LOG2OFe));
3121     SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
3122                              getF32Constant(DAG, 0x3fb8aa3b));
3123     SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
3124
3125     //   FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
3126     SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
3127     SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
3128
3129     //   IntegerPartOfX <<= 23;
3130     IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
3131                                  DAG.getConstant(23, TLI.getPointerTy()));
3132
3133     if (LimitFloatPrecision <= 6) {
3134       // For floating-point precision of 6:
3135       //
3136       //   TwoToFractionalPartOfX =
3137       //     0.997535578f +
3138       //       (0.735607626f + 0.252464424f * x) * x;
3139       //
3140       // error 0.0144103317, which is 6 bits
3141       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3142                                getF32Constant(DAG, 0x3e814304));
3143       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3144                                getF32Constant(DAG, 0x3f3c50c8));
3145       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3146       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3147                                getF32Constant(DAG, 0x3f7f5e7e));
3148       SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5);
3149
3150       // Add the exponent into the result in integer domain.
3151       SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3152                                TwoToFracPartOfX, IntegerPartOfX);
3153
3154       result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);
3155     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3156       // For floating-point precision of 12:
3157       //
3158       //   TwoToFractionalPartOfX =
3159       //     0.999892986f +
3160       //       (0.696457318f +
3161       //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
3162       //
3163       // 0.000107046256 error, which is 13 to 14 bits
3164       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3165                                getF32Constant(DAG, 0x3da235e3));
3166       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3167                                getF32Constant(DAG, 0x3e65b8f3));
3168       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3169       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3170                                getF32Constant(DAG, 0x3f324b07));
3171       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3172       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3173                                getF32Constant(DAG, 0x3f7ff8fd));
3174       SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7);
3175
3176       // Add the exponent into the result in integer domain.
3177       SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3178                                TwoToFracPartOfX, IntegerPartOfX);
3179
3180       result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);
3181     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3182       // For floating-point precision of 18:
3183       //
3184       //   TwoToFractionalPartOfX =
3185       //     0.999999982f +
3186       //       (0.693148872f +
3187       //         (0.240227044f +
3188       //           (0.554906021e-1f +
3189       //             (0.961591928e-2f +
3190       //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
3191       //
3192       // error 2.47208000*10^(-7), which is better than 18 bits
3193       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3194                                getF32Constant(DAG, 0x3924b03e));
3195       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3196                                getF32Constant(DAG, 0x3ab24b87));
3197       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3198       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3199                                getF32Constant(DAG, 0x3c1d8c17));
3200       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3201       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3202                                getF32Constant(DAG, 0x3d634a1d));
3203       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3204       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3205                                getF32Constant(DAG, 0x3e75fe14));
3206       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3207       SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
3208                                 getF32Constant(DAG, 0x3f317234));
3209       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
3210       SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
3211                                 getF32Constant(DAG, 0x3f800000));
3212       SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,
3213                                              MVT::i32, t13);
3214
3215       // Add the exponent into the result in integer domain.
3216       SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3217                                 TwoToFracPartOfX, IntegerPartOfX);
3218
3219       result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);
3220     }
3221   } else {
3222     // No special expansion.
3223     result = DAG.getNode(ISD::FEXP, dl,
3224                          getValue(I.getArgOperand(0)).getValueType(),
3225                          getValue(I.getArgOperand(0)));
3226   }
3227
3228   setValue(&I, result);
3229 }
3230
3231 /// visitLog - Lower a log intrinsic. Handles the special sequences for
3232 /// limited-precision mode.
3233 void
3234 SelectionDAGBuilder::visitLog(const CallInst &I) {
3235   SDValue result;
3236   DebugLoc dl = getCurDebugLoc();
3237
3238   if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
3239       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3240     SDValue Op = getValue(I.getArgOperand(0));
3241     SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
3242
3243     // Scale the exponent by log(2) [0.69314718f].
3244     SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
3245     SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
3246                                         getF32Constant(DAG, 0x3f317218));
3247
3248     // Get the significand and build it into a floating-point number with
3249     // exponent of 1.
3250     SDValue X = GetSignificand(DAG, Op1, dl);
3251
3252     if (LimitFloatPrecision <= 6) {
3253       // For floating-point precision of 6:
3254       //
3255       //   LogofMantissa =
3256       //     -1.1609546f +
3257       //       (1.4034025f - 0.23903021f * x) * x;
3258       //
3259       // error 0.0034276066, which is better than 8 bits
3260       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3261                                getF32Constant(DAG, 0xbe74c456));
3262       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3263                                getF32Constant(DAG, 0x3fb3a2b1));
3264       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3265       SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3266                                           getF32Constant(DAG, 0x3f949a29));
3267
3268       result = DAG.getNode(ISD::FADD, dl,
3269                            MVT::f32, LogOfExponent, LogOfMantissa);
3270     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3271       // For floating-point precision of 12:
3272       //
3273       //   LogOfMantissa =
3274       //     -1.7417939f +
3275       //       (2.8212026f +
3276       //         (-1.4699568f +
3277       //           (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
3278       //
3279       // error 0.000061011436, which is 14 bits
3280       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3281                                getF32Constant(DAG, 0xbd67b6d6));
3282       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3283                                getF32Constant(DAG, 0x3ee4f4b8));
3284       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3285       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3286                                getF32Constant(DAG, 0x3fbc278b));
3287       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3288       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3289                                getF32Constant(DAG, 0x40348e95));
3290       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3291       SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3292                                           getF32Constant(DAG, 0x3fdef31a));
3293
3294       result = DAG.getNode(ISD::FADD, dl,
3295                            MVT::f32, LogOfExponent, LogOfMantissa);
3296     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3297       // For floating-point precision of 18:
3298       //
3299       //   LogOfMantissa =
3300       //     -2.1072184f +
3301       //       (4.2372794f +
3302       //         (-3.7029485f +
3303       //           (2.2781945f +
3304       //             (-0.87823314f +
3305       //               (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
3306       //
3307       // error 0.0000023660568, which is better than 18 bits
3308       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3309                                getF32Constant(DAG, 0xbc91e5ac));
3310       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3311                                getF32Constant(DAG, 0x3e4350aa));
3312       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3313       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3314                                getF32Constant(DAG, 0x3f60d3e3));
3315       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3316       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3317                                getF32Constant(DAG, 0x4011cdf0));
3318       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3319       SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3320                                getF32Constant(DAG, 0x406cfd1c));
3321       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3322       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3323                                getF32Constant(DAG, 0x408797cb));
3324       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3325       SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
3326                                           getF32Constant(DAG, 0x4006dcab));
3327
3328       result = DAG.getNode(ISD::FADD, dl,
3329                            MVT::f32, LogOfExponent, LogOfMantissa);
3330     }
3331   } else {
3332     // No special expansion.
3333     result = DAG.getNode(ISD::FLOG, dl,
3334                          getValue(I.getArgOperand(0)).getValueType(),
3335                          getValue(I.getArgOperand(0)));
3336   }
3337
3338   setValue(&I, result);
3339 }
3340
3341 /// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
3342 /// limited-precision mode.
3343 void
3344 SelectionDAGBuilder::visitLog2(const CallInst &I) {
3345   SDValue result;
3346   DebugLoc dl = getCurDebugLoc();
3347
3348   if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
3349       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3350     SDValue Op = getValue(I.getArgOperand(0));
3351     SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
3352
3353     // Get the exponent.
3354     SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
3355
3356     // Get the significand and build it into a floating-point number with
3357     // exponent of 1.
3358     SDValue X = GetSignificand(DAG, Op1, dl);
3359
3360     // Different possible minimax approximations of significand in
3361     // floating-point for various degrees of accuracy over [1,2].
3362     if (LimitFloatPrecision <= 6) {
3363       // For floating-point precision of 6:
3364       //
3365       //   Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
3366       //
3367       // error 0.0049451742, which is more than 7 bits
3368       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3369                                getF32Constant(DAG, 0xbeb08fe0));
3370       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3371                                getF32Constant(DAG, 0x40019463));
3372       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3373       SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3374                                            getF32Constant(DAG, 0x3fd6633d));
3375
3376       result = DAG.getNode(ISD::FADD, dl,
3377                            MVT::f32, LogOfExponent, Log2ofMantissa);
3378     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3379       // For floating-point precision of 12:
3380       //
3381       //   Log2ofMantissa =
3382       //     -2.51285454f +
3383       //       (4.07009056f +
3384       //         (-2.12067489f +
3385       //           (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
3386       //
3387       // error 0.0000876136000, which is better than 13 bits
3388       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3389                                getF32Constant(DAG, 0xbda7262e));
3390       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3391                                getF32Constant(DAG, 0x3f25280b));
3392       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3393       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3394                                getF32Constant(DAG, 0x4007b923));
3395       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3396       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3397                                getF32Constant(DAG, 0x40823e2f));
3398       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3399       SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3400                                            getF32Constant(DAG, 0x4020d29c));
3401
3402       result = DAG.getNode(ISD::FADD, dl,
3403                            MVT::f32, LogOfExponent, Log2ofMantissa);
3404     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3405       // For floating-point precision of 18:
3406       //
3407       //   Log2ofMantissa =
3408       //     -3.0400495f +
3409       //       (6.1129976f +
3410       //         (-5.3420409f +
3411       //           (3.2865683f +
3412       //             (-1.2669343f +
3413       //               (0.27515199f -
3414       //                 0.25691327e-1f * x) * x) * x) * x) * x) * x;
3415       //
3416       // error 0.0000018516, which is better than 18 bits
3417       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3418                                getF32Constant(DAG, 0xbcd2769e));
3419       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3420                                getF32Constant(DAG, 0x3e8ce0b9));
3421       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3422       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3423                                getF32Constant(DAG, 0x3fa22ae7));
3424       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3425       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3426                                getF32Constant(DAG, 0x40525723));
3427       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3428       SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3429                                getF32Constant(DAG, 0x40aaf200));
3430       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3431       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3432                                getF32Constant(DAG, 0x40c39dad));
3433       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3434       SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
3435                                            getF32Constant(DAG, 0x4042902c));
3436
3437       result = DAG.getNode(ISD::FADD, dl,
3438                            MVT::f32, LogOfExponent, Log2ofMantissa);
3439     }
3440   } else {
3441     // No special expansion.
3442     result = DAG.getNode(ISD::FLOG2, dl,
3443                          getValue(I.getArgOperand(0)).getValueType(),
3444                          getValue(I.getArgOperand(0)));
3445   }
3446
3447   setValue(&I, result);
3448 }
3449
3450 /// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
3451 /// limited-precision mode.
3452 void
3453 SelectionDAGBuilder::visitLog10(const CallInst &I) {
3454   SDValue result;
3455   DebugLoc dl = getCurDebugLoc();
3456
3457   if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
3458       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3459     SDValue Op = getValue(I.getArgOperand(0));
3460     SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
3461
3462     // Scale the exponent by log10(2) [0.30102999f].
3463     SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
3464     SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
3465                                         getF32Constant(DAG, 0x3e9a209a));
3466
3467     // Get the significand and build it into a floating-point number with
3468     // exponent of 1.
3469     SDValue X = GetSignificand(DAG, Op1, dl);
3470
3471     if (LimitFloatPrecision <= 6) {
3472       // For floating-point precision of 6:
3473       //
3474       //   Log10ofMantissa =
3475       //     -0.50419619f +
3476       //       (0.60948995f - 0.10380950f * x) * x;
3477       //
3478       // error 0.0014886165, which is 6 bits
3479       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3480                                getF32Constant(DAG, 0xbdd49a13));
3481       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3482                                getF32Constant(DAG, 0x3f1c0789));
3483       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3484       SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3485                                             getF32Constant(DAG, 0x3f011300));
3486
3487       result = DAG.getNode(ISD::FADD, dl,
3488                            MVT::f32, LogOfExponent, Log10ofMantissa);
3489     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3490       // For floating-point precision of 12:
3491       //
3492       //   Log10ofMantissa =
3493       //     -0.64831180f +
3494       //       (0.91751397f +
3495       //         (-0.31664806f + 0.47637168e-1f * x) * x) * x;
3496       //
3497       // error 0.00019228036, which is better than 12 bits
3498       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3499                                getF32Constant(DAG, 0x3d431f31));
3500       SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
3501                                getF32Constant(DAG, 0x3ea21fb2));
3502       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3503       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3504                                getF32Constant(DAG, 0x3f6ae232));
3505       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3506       SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
3507                                             getF32Constant(DAG, 0x3f25f7c3));
3508
3509       result = DAG.getNode(ISD::FADD, dl,
3510                            MVT::f32, LogOfExponent, Log10ofMantissa);
3511     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3512       // For floating-point precision of 18:
3513       //
3514       //   Log10ofMantissa =
3515       //     -0.84299375f +
3516       //       (1.5327582f +
3517       //         (-1.0688956f +
3518       //           (0.49102474f +
3519       //             (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
3520       //
3521       // error 0.0000037995730, which is better than 18 bits
3522       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3523                                getF32Constant(DAG, 0x3c5d51ce));
3524       SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
3525                                getF32Constant(DAG, 0x3e00685a));
3526       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3527       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3528                                getF32Constant(DAG, 0x3efb6798));
3529       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3530       SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
3531                                getF32Constant(DAG, 0x3f88d192));
3532       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3533       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3534                                getF32Constant(DAG, 0x3fc4316c));
3535       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3536       SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
3537                                             getF32Constant(DAG, 0x3f57ce70));
3538
3539       result = DAG.getNode(ISD::FADD, dl,
3540                            MVT::f32, LogOfExponent, Log10ofMantissa);
3541     }
3542   } else {
3543     // No special expansion.
3544     result = DAG.getNode(ISD::FLOG10, dl,
3545                          getValue(I.getArgOperand(0)).getValueType(),
3546                          getValue(I.getArgOperand(0)));
3547   }
3548
3549   setValue(&I, result);
3550 }
3551
3552 /// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
3553 /// limited-precision mode.
3554 void
3555 SelectionDAGBuilder::visitExp2(const CallInst &I) {
3556   SDValue result;
3557   DebugLoc dl = getCurDebugLoc();
3558
3559   if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
3560       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3561     SDValue Op = getValue(I.getArgOperand(0));
3562
3563     SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
3564
3565     //   FractionalPartOfX = x - (float)IntegerPartOfX;
3566     SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
3567     SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
3568
3569     //   IntegerPartOfX <<= 23;
3570     IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
3571                                  DAG.getConstant(23, TLI.getPointerTy()));
3572
3573     if (LimitFloatPrecision <= 6) {
3574       // For floating-point precision of 6:
3575       //
3576       //   TwoToFractionalPartOfX =
3577       //     0.997535578f +
3578       //       (0.735607626f + 0.252464424f * x) * x;
3579       //
3580       // error 0.0144103317, which is 6 bits
3581       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3582                                getF32Constant(DAG, 0x3e814304));
3583       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3584                                getF32Constant(DAG, 0x3f3c50c8));
3585       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3586       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3587                                getF32Constant(DAG, 0x3f7f5e7e));
3588       SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
3589       SDValue TwoToFractionalPartOfX =
3590         DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
3591
3592       result = DAG.getNode(ISD::BIT_CONVERT, dl,
3593                            MVT::f32, TwoToFractionalPartOfX);
3594     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3595       // For floating-point precision of 12:
3596       //
3597       //   TwoToFractionalPartOfX =
3598       //     0.999892986f +
3599       //       (0.696457318f +
3600       //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
3601       //
3602       // error 0.000107046256, which is 13 to 14 bits
3603       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3604                                getF32Constant(DAG, 0x3da235e3));
3605       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3606                                getF32Constant(DAG, 0x3e65b8f3));
3607       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3608       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3609                                getF32Constant(DAG, 0x3f324b07));
3610       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3611       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3612                                getF32Constant(DAG, 0x3f7ff8fd));
3613       SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
3614       SDValue TwoToFractionalPartOfX =
3615         DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
3616
3617       result = DAG.getNode(ISD::BIT_CONVERT, dl,
3618                            MVT::f32, TwoToFractionalPartOfX);
3619     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3620       // For floating-point precision of 18:
3621       //
3622       //   TwoToFractionalPartOfX =
3623       //     0.999999982f +
3624       //       (0.693148872f +
3625       //         (0.240227044f +
3626       //           (0.554906021e-1f +
3627       //             (0.961591928e-2f +
3628       //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
3629       // error 2.47208000*10^(-7), which is better than 18 bits
3630       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3631                                getF32Constant(DAG, 0x3924b03e));
3632       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3633                                getF32Constant(DAG, 0x3ab24b87));
3634       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3635       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3636                                getF32Constant(DAG, 0x3c1d8c17));
3637       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3638       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3639                                getF32Constant(DAG, 0x3d634a1d));
3640       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3641       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3642                                getF32Constant(DAG, 0x3e75fe14));
3643       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3644       SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
3645                                 getF32Constant(DAG, 0x3f317234));
3646       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
3647       SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
3648                                 getF32Constant(DAG, 0x3f800000));
3649       SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
3650       SDValue TwoToFractionalPartOfX =
3651         DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
3652
3653       result = DAG.getNode(ISD::BIT_CONVERT, dl,
3654                            MVT::f32, TwoToFractionalPartOfX);
3655     }
3656   } else {
3657     // No special expansion.
3658     result = DAG.getNode(ISD::FEXP2, dl,
3659                          getValue(I.getArgOperand(0)).getValueType(),
3660                          getValue(I.getArgOperand(0)));
3661   }
3662
3663   setValue(&I, result);
3664 }
3665
3666 /// visitPow - Lower a pow intrinsic. Handles the special sequences for
3667 /// limited-precision mode with x == 10.0f.
3668 void
3669 SelectionDAGBuilder::visitPow(const CallInst &I) {
3670   SDValue result;
3671   const Value *Val = I.getArgOperand(0);
3672   DebugLoc dl = getCurDebugLoc();
3673   bool IsExp10 = false;
3674
3675   if (getValue(Val).getValueType() == MVT::f32 &&
3676       getValue(I.getArgOperand(1)).getValueType() == MVT::f32 &&
3677       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3678     if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
3679       if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
3680         APFloat Ten(10.0f);
3681         IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);
3682       }
3683     }
3684   }
3685
3686   if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3687     SDValue Op = getValue(I.getArgOperand(1));
3688
3689     // Put the exponent in the right bit position for later addition to the
3690     // final result:
3691     //
3692     //   #define LOG2OF10 3.3219281f
3693     //   IntegerPartOfX = (int32_t)(x * LOG2OF10);
3694     SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
3695                              getF32Constant(DAG, 0x40549a78));
3696     SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
3697
3698     //   FractionalPartOfX = x - (float)IntegerPartOfX;
3699     SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
3700     SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
3701
3702     //   IntegerPartOfX <<= 23;
3703     IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
3704                                  DAG.getConstant(23, TLI.getPointerTy()));
3705
3706     if (LimitFloatPrecision <= 6) {
3707       // For floating-point precision of 6:
3708       //
3709       //   twoToFractionalPartOfX =
3710       //     0.997535578f +
3711       //       (0.735607626f + 0.252464424f * x) * x;
3712       //
3713       // error 0.0144103317, which is 6 bits
3714       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3715                                getF32Constant(DAG, 0x3e814304));
3716       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3717                                getF32Constant(DAG, 0x3f3c50c8));
3718       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3719       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3720                                getF32Constant(DAG, 0x3f7f5e7e));
3721       SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
3722       SDValue TwoToFractionalPartOfX =
3723         DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
3724
3725       result = DAG.getNode(ISD::BIT_CONVERT, dl,
3726                            MVT::f32, TwoToFractionalPartOfX);
3727     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3728       // For floating-point precision of 12:
3729       //
3730       //   TwoToFractionalPartOfX =
3731       //     0.999892986f +
3732       //       (0.696457318f +
3733       //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
3734       //
3735       // error 0.000107046256, which is 13 to 14 bits
3736       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3737                                getF32Constant(DAG, 0x3da235e3));
3738       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3739                                getF32Constant(DAG, 0x3e65b8f3));
3740       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3741       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3742                                getF32Constant(DAG, 0x3f324b07));
3743       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3744       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3745                                getF32Constant(DAG, 0x3f7ff8fd));
3746       SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
3747       SDValue TwoToFractionalPartOfX =
3748         DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
3749
3750       result = DAG.getNode(ISD::BIT_CONVERT, dl,
3751                            MVT::f32, TwoToFractionalPartOfX);
3752     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3753       // For floating-point precision of 18:
3754       //
3755       //   TwoToFractionalPartOfX =
3756       //     0.999999982f +
3757       //       (0.693148872f +
3758       //         (0.240227044f +
3759       //           (0.554906021e-1f +
3760       //             (0.961591928e-2f +
3761       //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
3762       // error 2.47208000*10^(-7), which is better than 18 bits
3763       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3764                                getF32Constant(DAG, 0x3924b03e));
3765       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3766                                getF32Constant(DAG, 0x3ab24b87));
3767       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3768       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3769                                getF32Constant(DAG, 0x3c1d8c17));
3770       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3771       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3772                                getF32Constant(DAG, 0x3d634a1d));
3773       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3774       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3775                                getF32Constant(DAG, 0x3e75fe14));
3776       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3777       SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
3778                                 getF32Constant(DAG, 0x3f317234));
3779       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
3780       SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
3781                                 getF32Constant(DAG, 0x3f800000));
3782       SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
3783       SDValue TwoToFractionalPartOfX =
3784         DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
3785
3786       result = DAG.getNode(ISD::BIT_CONVERT, dl,
3787                            MVT::f32, TwoToFractionalPartOfX);
3788     }
3789   } else {
3790     // No special expansion.
3791     result = DAG.getNode(ISD::FPOW, dl,
3792                          getValue(I.getArgOperand(0)).getValueType(),
3793                          getValue(I.getArgOperand(0)),
3794                          getValue(I.getArgOperand(1)));
3795   }
3796
3797   setValue(&I, result);
3798 }
3799
3800
3801 /// ExpandPowI - Expand a llvm.powi intrinsic.
3802 static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
3803                           SelectionDAG &DAG) {
3804   // If RHS is a constant, we can expand this out to a multiplication tree,
3805   // otherwise we end up lowering to a call to __powidf2 (for example).  When
3806   // optimizing for size, we only want to do this if the expansion would produce
3807   // a small number of multiplies, otherwise we do the full expansion.
3808   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
3809     // Get the exponent as a positive value.
3810     unsigned Val = RHSC->getSExtValue();
3811     if ((int)Val < 0) Val = -Val;
3812
3813     // powi(x, 0) -> 1.0
3814     if (Val == 0)
3815       return DAG.getConstantFP(1.0, LHS.getValueType());
3816
3817     const Function *F = DAG.getMachineFunction().getFunction();
3818     if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
3819         // If optimizing for size, don't insert too many multiplies.  This
3820         // inserts up to 5 multiplies.
3821         CountPopulation_32(Val)+Log2_32(Val) < 7) {
3822       // We use the simple binary decomposition method to generate the multiply
3823       // sequence.  There are more optimal ways to do this (for example,
3824       // powi(x,15) generates one more multiply than it should), but this has
3825       // the benefit of being both really simple and much better than a libcall.
3826       SDValue Res;  // Logically starts equal to 1.0
3827       SDValue CurSquare = LHS;
3828       while (Val) {
3829         if (Val & 1) {
3830           if (Res.getNode())
3831             Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
3832           else
3833             Res = CurSquare;  // 1.0*CurSquare.
3834         }
3835
3836         CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
3837                                 CurSquare, CurSquare);
3838         Val >>= 1;
3839       }
3840
3841       // If the original was negative, invert the result, producing 1/(x*x*x).
3842       if (RHSC->getSExtValue() < 0)
3843         Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
3844                           DAG.getConstantFP(1.0, LHS.getValueType()), Res);
3845       return Res;
3846     }
3847   }
3848
3849   // Otherwise, expand to a libcall.
3850   return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
3851 }
3852
3853 /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
3854 /// argument, create the corresponding DBG_VALUE machine instruction for it now.
3855 /// At the end of instruction selection, they will be inserted to the entry BB.
3856 bool
3857 SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI,
3858                                               const Value *V, MDNode *Variable,
3859                                               uint64_t Offset,
3860                                               const SDValue &N) {
3861   if (!isa<Argument>(V))
3862     return false;
3863
3864   MachineFunction &MF = DAG.getMachineFunction();
3865   // Ignore inlined function arguments here.
3866   DIVariable DV(Variable);
3867   if (DV.isInlinedFnArgument(MF.getFunction()))
3868     return false;
3869
3870   MachineBasicBlock *MBB = FuncInfo.MBB;
3871   if (MBB != &MF.front())
3872     return false;
3873
3874   unsigned Reg = 0;
3875   if (N.getOpcode() == ISD::CopyFromReg) {
3876     Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
3877     if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
3878       MachineRegisterInfo &RegInfo = MF.getRegInfo();
3879       unsigned PR = RegInfo.getLiveInPhysReg(Reg);
3880       if (PR)
3881         Reg = PR;
3882     }
3883   }
3884
3885   if (!Reg) {
3886     DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
3887     if (VMI == FuncInfo.ValueMap.end())
3888       return false;
3889     Reg = VMI->second;
3890   }
3891
3892   const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
3893   MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
3894                                     TII->get(TargetOpcode::DBG_VALUE))
3895     .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
3896   FuncInfo.ArgDbgValues.push_back(&*MIB);
3897   return true;
3898 }
3899
3900 // VisualStudio defines setjmp as _setjmp
3901 #if defined(_MSC_VER) && defined(setjmp)
3902 #define setjmp_undefined_for_visual_studio
3903 #undef setjmp
3904 #endif
3905
3906 /// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
3907 /// we want to emit this as a call to a named external function, return the name
3908 /// otherwise lower it and return null.
3909 const char *
3910 SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
3911   DebugLoc dl = getCurDebugLoc();
3912   SDValue Res;
3913
3914   switch (Intrinsic) {
3915   default:
3916     // By default, turn this into a target intrinsic node.
3917     visitTargetIntrinsic(I, Intrinsic);
3918     return 0;
3919   case Intrinsic::vastart:  visitVAStart(I); return 0;
3920   case Intrinsic::vaend:    visitVAEnd(I); return 0;
3921   case Intrinsic::vacopy:   visitVACopy(I); return 0;
3922   case Intrinsic::returnaddress:
3923     setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
3924                              getValue(I.getArgOperand(0))));
3925     return 0;
3926   case Intrinsic::frameaddress:
3927     setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
3928                              getValue(I.getArgOperand(0))));
3929     return 0;
3930   case Intrinsic::setjmp:
3931     return "_setjmp"+!TLI.usesUnderscoreSetJmp();
3932   case Intrinsic::longjmp:
3933     return "_longjmp"+!TLI.usesUnderscoreLongJmp();
3934   case Intrinsic::memcpy: {
3935     // Assert for address < 256 since we support only user defined address
3936     // spaces.
3937     assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
3938            < 256 &&
3939            cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
3940            < 256 &&
3941            "Unknown address space");
3942     SDValue Op1 = getValue(I.getArgOperand(0));
3943     SDValue Op2 = getValue(I.getArgOperand(1));
3944     SDValue Op3 = getValue(I.getArgOperand(2));
3945     unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
3946     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
3947     DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
3948                               I.getArgOperand(0), 0, I.getArgOperand(1), 0));
3949     return 0;
3950   }
3951   case Intrinsic::memset: {
3952     // Assert for address < 256 since we support only user defined address
3953     // spaces.
3954     assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
3955            < 256 &&
3956            "Unknown address space");
3957     SDValue Op1 = getValue(I.getArgOperand(0));
3958     SDValue Op2 = getValue(I.getArgOperand(1));
3959     SDValue Op3 = getValue(I.getArgOperand(2));
3960     unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
3961     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
3962     DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
3963                               I.getArgOperand(0), 0));
3964     return 0;
3965   }
3966   case Intrinsic::memmove: {
3967     // Assert for address < 256 since we support only user defined address
3968     // spaces.
3969     assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
3970            < 256 &&
3971            cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
3972            < 256 &&
3973            "Unknown address space");
3974     SDValue Op1 = getValue(I.getArgOperand(0));
3975     SDValue Op2 = getValue(I.getArgOperand(1));
3976     SDValue Op3 = getValue(I.getArgOperand(2));
3977     unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
3978     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
3979
3980     // If the source and destination are known to not be aliases, we can
3981     // lower memmove as memcpy.
3982     uint64_t Size = -1ULL;
3983     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
3984       Size = C->getZExtValue();
3985     if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) ==
3986         AliasAnalysis::NoAlias) {
3987       DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
3988                                 false, I.getArgOperand(0), 0,
3989                                 I.getArgOperand(1), 0));
3990       return 0;
3991     }
3992
3993     DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
3994                                I.getArgOperand(0), 0, I.getArgOperand(1), 0));
3995     return 0;
3996   }
3997   case Intrinsic::dbg_declare: {
3998     const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
3999     if (!DIVariable(DI.getVariable()).Verify())
4000       return 0;
4001
4002     MDNode *Variable = DI.getVariable();
4003     // Parameters are handled specially.
4004     bool isParameter =
4005       DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
4006     const Value *Address = DI.getAddress();
4007     if (!Address)
4008       return 0;
4009     if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
4010       Address = BCI->getOperand(0);
4011     const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
4012
4013     // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
4014     // but do not always have a corresponding SDNode built.  The SDNodeOrder
4015     // absolute, but not relative, values are different depending on whether
4016     // debug info exists.
4017     ++SDNodeOrder;
4018     SDValue &N = NodeMap[Address];
4019     SDDbgValue *SDV;
4020     if (N.getNode()) {
4021       if (isParameter && !AI) {
4022         FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
4023         if (FINode)
4024           // Byval parameter.  We have a frame index at this point.
4025           SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
4026                                 0, dl, SDNodeOrder);
4027         else
4028           // Can't do anything with other non-AI cases yet.  This might be a
4029           // parameter of a callee function that got inlined, for example.
4030           return 0;
4031       } else if (AI)
4032         SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
4033                               0, dl, SDNodeOrder);
4034       else
4035         // Can't do anything with other non-AI cases yet.
4036         return 0;
4037       DAG.AddDbgValue(SDV, N.getNode(), isParameter);
4038     } else {
4039       // This isn't useful, but it shows what we're missing.
4040       SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
4041                             0, dl, SDNodeOrder);
4042       DAG.AddDbgValue(SDV, 0, isParameter);
4043     }
4044     return 0;
4045   }
4046   case Intrinsic::dbg_value: {
4047     const DbgValueInst &DI = cast<DbgValueInst>(I);
4048     if (!DIVariable(DI.getVariable()).Verify())
4049       return 0;
4050
4051     MDNode *Variable = DI.getVariable();
4052     uint64_t Offset = DI.getOffset();
4053     const Value *V = DI.getValue();
4054     if (!V)
4055       return 0;
4056
4057     // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
4058     // but do not always have a corresponding SDNode built.  The SDNodeOrder
4059     // absolute, but not relative, values are different depending on whether
4060     // debug info exists.
4061     ++SDNodeOrder;
4062     SDDbgValue *SDV;
4063     if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) {
4064       SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
4065       DAG.AddDbgValue(SDV, 0, false);
4066     } else {
4067       bool createUndef = false;
4068       // Do not use getValue() in here; we don't want to generate code at
4069       // this point if it hasn't been done yet.
4070       SDValue N = NodeMap[V];
4071       if (!N.getNode() && isa<Argument>(V))
4072         // Check unused arguments map.
4073         N = UnusedArgNodeMap[V];
4074       if (N.getNode()) {
4075         if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) {
4076           SDV = DAG.getDbgValue(Variable, N.getNode(),
4077                                 N.getResNo(), Offset, dl, SDNodeOrder);
4078           DAG.AddDbgValue(SDV, N.getNode(), false);
4079         }
4080       } else if (isa<PHINode>(V) && !V->use_empty() ) {
4081         // Do not call getValue(V) yet, as we don't want to generate code.
4082         // Remember it for later.
4083         DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
4084         DanglingDebugInfoMap[V] = DDI;
4085       } else
4086         createUndef = true;
4087       if (createUndef) {
4088         // We may expand this to cover more cases.  One case where we have no
4089         // data available is an unreferenced parameter; we need this fallback.
4090         SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
4091                               Offset, dl, SDNodeOrder);
4092         DAG.AddDbgValue(SDV, 0, false);
4093       }
4094     }
4095
4096     // Build a debug info table entry.
4097     if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
4098       V = BCI->getOperand(0);
4099     const AllocaInst *AI = dyn_cast<AllocaInst>(V);
4100     // Don't handle byval struct arguments or VLAs, for example.
4101     if (!AI)
4102       return 0;
4103     DenseMap<const AllocaInst*, int>::iterator SI =
4104       FuncInfo.StaticAllocaMap.find(AI);
4105     if (SI == FuncInfo.StaticAllocaMap.end())
4106       return 0; // VLAs.
4107     int FI = SI->second;
4108
4109     MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
4110     if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
4111       MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
4112     return 0;
4113   }
4114   case Intrinsic::eh_exception: {
4115     // Insert the EXCEPTIONADDR instruction.
4116     assert(FuncInfo.MBB->isLandingPad() &&
4117            "Call to eh.exception not in landing pad!");
4118     SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
4119     SDValue Ops[1];
4120     Ops[0] = DAG.getRoot();
4121     SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
4122     setValue(&I, Op);
4123     DAG.setRoot(Op.getValue(1));
4124     return 0;
4125   }
4126
4127   case Intrinsic::eh_selector: {
4128     MachineBasicBlock *CallMBB = FuncInfo.MBB;
4129     MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
4130     if (CallMBB->isLandingPad())
4131       AddCatchInfo(I, &MMI, CallMBB);
4132     else {
4133 #ifndef NDEBUG
4134       FuncInfo.CatchInfoLost.insert(&I);
4135 #endif
4136       // FIXME: Mark exception selector register as live in.  Hack for PR1508.
4137       unsigned Reg = TLI.getExceptionSelectorRegister();
4138       if (Reg) FuncInfo.MBB->addLiveIn(Reg);
4139     }
4140
4141     // Insert the EHSELECTION instruction.
4142     SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
4143     SDValue Ops[2];
4144     Ops[0] = getValue(I.getArgOperand(0));
4145     Ops[1] = getRoot();
4146     SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
4147     DAG.setRoot(Op.getValue(1));
4148     setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
4149     return 0;
4150   }
4151
4152   case Intrinsic::eh_typeid_for: {
4153     // Find the type id for the given typeinfo.
4154     GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0));
4155     unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
4156     Res = DAG.getConstant(TypeID, MVT::i32);
4157     setValue(&I, Res);
4158     return 0;
4159   }
4160
4161   case Intrinsic::eh_return_i32:
4162   case Intrinsic::eh_return_i64:
4163     DAG.getMachineFunction().getMMI().setCallsEHReturn(true);
4164     DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
4165                             MVT::Other,
4166                             getControlRoot(),
4167                             getValue(I.getArgOperand(0)),
4168                             getValue(I.getArgOperand(1))));
4169     return 0;
4170   case Intrinsic::eh_unwind_init:
4171     DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
4172     return 0;
4173   case Intrinsic::eh_dwarf_cfa: {
4174     SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl,
4175                                         TLI.getPointerTy());
4176     SDValue Offset = DAG.getNode(ISD::ADD, dl,
4177                                  TLI.getPointerTy(),
4178                                  DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
4179                                              TLI.getPointerTy()),
4180                                  CfaArg);
4181     SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl,
4182                              TLI.getPointerTy(),
4183                              DAG.getConstant(0, TLI.getPointerTy()));
4184     setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
4185                              FA, Offset));
4186     return 0;
4187   }
4188   case Intrinsic::eh_sjlj_callsite: {
4189     MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
4190     ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
4191     assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
4192     assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
4193
4194     MMI.setCurrentCallSite(CI->getZExtValue());
4195     return 0;
4196   }
4197   case Intrinsic::eh_sjlj_setjmp: {
4198     setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(),
4199                              getValue(I.getArgOperand(0))));
4200     return 0;
4201   }
4202   case Intrinsic::eh_sjlj_longjmp: {
4203     DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
4204                             getRoot(),
4205                             getValue(I.getArgOperand(0))));
4206     return 0;
4207   }
4208
4209   case Intrinsic::convertff:
4210   case Intrinsic::convertfsi:
4211   case Intrinsic::convertfui:
4212   case Intrinsic::convertsif:
4213   case Intrinsic::convertuif:
4214   case Intrinsic::convertss:
4215   case Intrinsic::convertsu:
4216   case Intrinsic::convertus:
4217   case Intrinsic::convertuu: {
4218     ISD::CvtCode Code = ISD::CVT_INVALID;
4219     switch (Intrinsic) {
4220     case Intrinsic::convertff:  Code = ISD::CVT_FF; break;
4221     case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
4222     case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
4223     case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
4224     case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
4225     case Intrinsic::convertss:  Code = ISD::CVT_SS; break;
4226     case Intrinsic::convertsu:  Code = ISD::CVT_SU; break;
4227     case Intrinsic::convertus:  Code = ISD::CVT_US; break;
4228     case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
4229     }
4230     EVT DestVT = TLI.getValueType(I.getType());
4231     const Value *Op1 = I.getArgOperand(0);
4232     Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
4233                                DAG.getValueType(DestVT),
4234                                DAG.getValueType(getValue(Op1).getValueType()),
4235                                getValue(I.getArgOperand(1)),
4236                                getValue(I.getArgOperand(2)),
4237                                Code);
4238     setValue(&I, Res);
4239     return 0;
4240   }
4241   case Intrinsic::sqrt:
4242     setValue(&I, DAG.getNode(ISD::FSQRT, dl,
4243                              getValue(I.getArgOperand(0)).getValueType(),
4244                              getValue(I.getArgOperand(0))));
4245     return 0;
4246   case Intrinsic::powi:
4247     setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)),
4248                             getValue(I.getArgOperand(1)), DAG));
4249     return 0;
4250   case Intrinsic::sin:
4251     setValue(&I, DAG.getNode(ISD::FSIN, dl,
4252                              getValue(I.getArgOperand(0)).getValueType(),
4253                              getValue(I.getArgOperand(0))));
4254     return 0;
4255   case Intrinsic::cos:
4256     setValue(&I, DAG.getNode(ISD::FCOS, dl,
4257                              getValue(I.getArgOperand(0)).getValueType(),
4258                              getValue(I.getArgOperand(0))));
4259     return 0;
4260   case Intrinsic::log:
4261     visitLog(I);
4262     return 0;
4263   case Intrinsic::log2:
4264     visitLog2(I);
4265     return 0;
4266   case Intrinsic::log10:
4267     visitLog10(I);
4268     return 0;
4269   case Intrinsic::exp:
4270     visitExp(I);
4271     return 0;
4272   case Intrinsic::exp2:
4273     visitExp2(I);
4274     return 0;
4275   case Intrinsic::pow:
4276     visitPow(I);
4277     return 0;
4278   case Intrinsic::convert_to_fp16:
4279     setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
4280                              MVT::i16, getValue(I.getArgOperand(0))));
4281     return 0;
4282   case Intrinsic::convert_from_fp16:
4283     setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl,
4284                              MVT::f32, getValue(I.getArgOperand(0))));
4285     return 0;
4286   case Intrinsic::pcmarker: {
4287     SDValue Tmp = getValue(I.getArgOperand(0));
4288     DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
4289     return 0;
4290   }
4291   case Intrinsic::readcyclecounter: {
4292     SDValue Op = getRoot();
4293     Res = DAG.getNode(ISD::READCYCLECOUNTER, dl,
4294                       DAG.getVTList(MVT::i64, MVT::Other),
4295                       &Op, 1);
4296     setValue(&I, Res);
4297     DAG.setRoot(Res.getValue(1));
4298     return 0;
4299   }
4300   case Intrinsic::bswap:
4301     setValue(&I, DAG.getNode(ISD::BSWAP, dl,
4302                              getValue(I.getArgOperand(0)).getValueType(),
4303                              getValue(I.getArgOperand(0))));
4304     return 0;
4305   case Intrinsic::cttz: {
4306     SDValue Arg = getValue(I.getArgOperand(0));
4307     EVT Ty = Arg.getValueType();
4308     setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg));
4309     return 0;
4310   }
4311   case Intrinsic::ctlz: {
4312     SDValue Arg = getValue(I.getArgOperand(0));
4313     EVT Ty = Arg.getValueType();
4314     setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg));
4315     return 0;
4316   }
4317   case Intrinsic::ctpop: {
4318     SDValue Arg = getValue(I.getArgOperand(0));
4319     EVT Ty = Arg.getValueType();
4320     setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg));
4321     return 0;
4322   }
4323   case Intrinsic::stacksave: {
4324     SDValue Op = getRoot();
4325     Res = DAG.getNode(ISD::STACKSAVE, dl,
4326                       DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
4327     setValue(&I, Res);
4328     DAG.setRoot(Res.getValue(1));
4329     return 0;
4330   }
4331   case Intrinsic::stackrestore: {
4332     Res = getValue(I.getArgOperand(0));
4333     DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res));
4334     return 0;
4335   }
4336   case Intrinsic::stackprotector: {
4337     // Emit code into the DAG to store the stack guard onto the stack.
4338     MachineFunction &MF = DAG.getMachineFunction();
4339     MachineFrameInfo *MFI = MF.getFrameInfo();
4340     EVT PtrTy = TLI.getPointerTy();
4341
4342     SDValue Src = getValue(I.getArgOperand(0));   // The guard's value.
4343     AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
4344
4345     int FI = FuncInfo.StaticAllocaMap[Slot];
4346     MFI->setStackProtectorIndex(FI);
4347
4348     SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
4349
4350     // Store the stack protector onto the stack.
4351     Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
4352                        PseudoSourceValue::getFixedStack(FI),
4353                        0, true, false, 0);
4354     setValue(&I, Res);
4355     DAG.setRoot(Res);
4356     return 0;
4357   }
4358   case Intrinsic::objectsize: {
4359     // If we don't know by now, we're never going to know.
4360     ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
4361
4362     assert(CI && "Non-constant type in __builtin_object_size?");
4363
4364     SDValue Arg = getValue(I.getCalledValue());
4365     EVT Ty = Arg.getValueType();
4366
4367     if (CI->isZero())
4368       Res = DAG.getConstant(-1ULL, Ty);
4369     else
4370       Res = DAG.getConstant(0, Ty);
4371
4372     setValue(&I, Res);
4373     return 0;
4374   }
4375   case Intrinsic::var_annotation:
4376     // Discard annotate attributes
4377     return 0;
4378
4379   case Intrinsic::init_trampoline: {
4380     const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
4381
4382     SDValue Ops[6];
4383     Ops[0] = getRoot();
4384     Ops[1] = getValue(I.getArgOperand(0));
4385     Ops[2] = getValue(I.getArgOperand(1));
4386     Ops[3] = getValue(I.getArgOperand(2));
4387     Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
4388     Ops[5] = DAG.getSrcValue(F);
4389
4390     Res = DAG.getNode(ISD::TRAMPOLINE, dl,
4391                       DAG.getVTList(TLI.getPointerTy(), MVT::Other),
4392                       Ops, 6);
4393
4394     setValue(&I, Res);
4395     DAG.setRoot(Res.getValue(1));
4396     return 0;
4397   }
4398   case Intrinsic::gcroot:
4399     if (GFI) {
4400       const Value *Alloca = I.getArgOperand(0);
4401       const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
4402
4403       FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
4404       GFI->addStackRoot(FI->getIndex(), TypeMap);
4405     }
4406     return 0;
4407   case Intrinsic::gcread:
4408   case Intrinsic::gcwrite:
4409     llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
4410     return 0;
4411   case Intrinsic::flt_rounds:
4412     setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
4413     return 0;
4414   case Intrinsic::trap:
4415     DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
4416     return 0;
4417   case Intrinsic::uadd_with_overflow:
4418     return implVisitAluOverflow(I, ISD::UADDO);
4419   case Intrinsic::sadd_with_overflow:
4420     return implVisitAluOverflow(I, ISD::SADDO);
4421   case Intrinsic::usub_with_overflow:
4422     return implVisitAluOverflow(I, ISD::USUBO);
4423   case Intrinsic::ssub_with_overflow:
4424     return implVisitAluOverflow(I, ISD::SSUBO);
4425   case Intrinsic::umul_with_overflow:
4426     return implVisitAluOverflow(I, ISD::UMULO);
4427   case Intrinsic::smul_with_overflow:
4428     return implVisitAluOverflow(I, ISD::SMULO);
4429
4430   case Intrinsic::prefetch: {
4431     SDValue Ops[4];
4432     Ops[0] = getRoot();
4433     Ops[1] = getValue(I.getArgOperand(0));
4434     Ops[2] = getValue(I.getArgOperand(1));
4435     Ops[3] = getValue(I.getArgOperand(2));
4436     DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
4437     return 0;
4438   }
4439
4440   case Intrinsic::memory_barrier: {
4441     SDValue Ops[6];
4442     Ops[0] = getRoot();
4443     for (int x = 1; x < 6; ++x)
4444       Ops[x] = getValue(I.getArgOperand(x - 1));
4445
4446     DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
4447     return 0;
4448   }
4449   case Intrinsic::atomic_cmp_swap: {
4450     SDValue Root = getRoot();
4451     SDValue L =
4452       DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
4453                     getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
4454                     Root,
4455                     getValue(I.getArgOperand(0)),
4456                     getValue(I.getArgOperand(1)),
4457                     getValue(I.getArgOperand(2)),
4458                     I.getArgOperand(0));
4459     setValue(&I, L);
4460     DAG.setRoot(L.getValue(1));
4461     return 0;
4462   }
4463   case Intrinsic::atomic_load_add:
4464     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD);
4465   case Intrinsic::atomic_load_sub:
4466     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB);
4467   case Intrinsic::atomic_load_or:
4468     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);
4469   case Intrinsic::atomic_load_xor:
4470     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);
4471   case Intrinsic::atomic_load_and:
4472     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);
4473   case Intrinsic::atomic_load_nand:
4474     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND);
4475   case Intrinsic::atomic_load_max:
4476     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);
4477   case Intrinsic::atomic_load_min:
4478     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);
4479   case Intrinsic::atomic_load_umin:
4480     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);
4481   case Intrinsic::atomic_load_umax:
4482     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
4483   case Intrinsic::atomic_swap:
4484     return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
4485
4486   case Intrinsic::invariant_start:
4487   case Intrinsic::lifetime_start:
4488     // Discard region information.
4489     setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
4490     return 0;
4491   case Intrinsic::invariant_end:
4492   case Intrinsic::lifetime_end:
4493     // Discard region information.
4494     return 0;
4495   }
4496 }
4497
4498 void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
4499                                       bool isTailCall,
4500                                       MachineBasicBlock *LandingPad) {
4501   const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
4502   const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
4503   const Type *RetTy = FTy->getReturnType();
4504   MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
4505   MCSymbol *BeginLabel = 0;
4506
4507   TargetLowering::ArgListTy Args;
4508   TargetLowering::ArgListEntry Entry;
4509   Args.reserve(CS.arg_size());
4510
4511   // Check whether the function can return without sret-demotion.
4512   SmallVector<ISD::OutputArg, 4> Outs;
4513   SmallVector<uint64_t, 4> Offsets;
4514   GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
4515                 Outs, TLI, &Offsets);
4516
4517   bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
4518                         FTy->isVarArg(), Outs, FTy->getContext());
4519
4520   SDValue DemoteStackSlot;
4521
4522   if (!CanLowerReturn) {
4523     uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
4524                       FTy->getReturnType());
4525     unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(
4526                       FTy->getReturnType());
4527     MachineFunction &MF = DAG.getMachineFunction();
4528     int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
4529     const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
4530
4531     DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
4532     Entry.Node = DemoteStackSlot;
4533     Entry.Ty = StackSlotPtrType;
4534     Entry.isSExt = false;
4535     Entry.isZExt = false;
4536     Entry.isInReg = false;
4537     Entry.isSRet = true;
4538     Entry.isNest = false;
4539     Entry.isByVal = false;
4540     Entry.Alignment = Align;
4541     Args.push_back(Entry);
4542     RetTy = Type::getVoidTy(FTy->getContext());
4543   }
4544
4545   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
4546        i != e; ++i) {
4547     SDValue ArgNode = getValue(*i);
4548     Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
4549
4550     unsigned attrInd = i - CS.arg_begin() + 1;
4551     Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
4552     Entry.isZExt  = CS.paramHasAttr(attrInd, Attribute::ZExt);
4553     Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
4554     Entry.isSRet  = CS.paramHasAttr(attrInd, Attribute::StructRet);
4555     Entry.isNest  = CS.paramHasAttr(attrInd, Attribute::Nest);
4556     Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
4557     Entry.Alignment = CS.getParamAlignment(attrInd);
4558     Args.push_back(Entry);
4559   }
4560
4561   if (LandingPad) {
4562     // Insert a label before the invoke call to mark the try range.  This can be
4563     // used to detect deletion of the invoke via the MachineModuleInfo.
4564     BeginLabel = MMI.getContext().CreateTempSymbol();
4565
4566     // For SjLj, keep track of which landing pads go with which invokes
4567     // so as to maintain the ordering of pads in the LSDA.
4568     unsigned CallSiteIndex = MMI.getCurrentCallSite();
4569     if (CallSiteIndex) {
4570       MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
4571       // Now that the call site is handled, stop tracking it.
4572       MMI.setCurrentCallSite(0);
4573     }
4574
4575     // Both PendingLoads and PendingExports must be flushed here;
4576     // this call might not return.
4577     (void)getRoot();
4578     DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel));
4579   }
4580
4581   // Check if target-independent constraints permit a tail call here.
4582   // Target-dependent constraints are checked within TLI.LowerCallTo.
4583   if (isTailCall &&
4584       !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI))
4585     isTailCall = false;
4586
4587   std::pair<SDValue,SDValue> Result =
4588     TLI.LowerCallTo(getRoot(), RetTy,
4589                     CS.paramHasAttr(0, Attribute::SExt),
4590                     CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
4591                     CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
4592                     CS.getCallingConv(),
4593                     isTailCall,
4594                     !CS.getInstruction()->use_empty(),
4595                     Callee, Args, DAG, getCurDebugLoc());
4596   assert((isTailCall || Result.second.getNode()) &&
4597          "Non-null chain expected with non-tail call!");
4598   assert((Result.second.getNode() || !Result.first.getNode()) &&
4599          "Null value expected with tail call!");
4600   if (Result.first.getNode()) {
4601     setValue(CS.getInstruction(), Result.first);
4602   } else if (!CanLowerReturn && Result.second.getNode()) {
4603     // The instruction result is the result of loading from the
4604     // hidden sret parameter.
4605     SmallVector<EVT, 1> PVTs;
4606     const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
4607
4608     ComputeValueVTs(TLI, PtrRetTy, PVTs);
4609     assert(PVTs.size() == 1 && "Pointers should fit in one register");
4610     EVT PtrVT = PVTs[0];
4611     unsigned NumValues = Outs.size();
4612     SmallVector<SDValue, 4> Values(NumValues);
4613     SmallVector<SDValue, 4> Chains(NumValues);
4614
4615     for (unsigned i = 0; i < NumValues; ++i) {
4616       SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT,
4617                                 DemoteStackSlot,
4618                                 DAG.getConstant(Offsets[i], PtrVT));
4619       SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
4620                               Add, NULL, Offsets[i], false, false, 1);
4621       Values[i] = L;
4622       Chains[i] = L.getValue(1);
4623     }
4624
4625     SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
4626                                 MVT::Other, &Chains[0], NumValues);
4627     PendingLoads.push_back(Chain);
4628
4629     // Collect the legal value parts into potentially illegal values
4630     // that correspond to the original function's return values.
4631     SmallVector<EVT, 4> RetTys;
4632     RetTy = FTy->getReturnType();
4633     ComputeValueVTs(TLI, RetTy, RetTys);
4634     ISD::NodeType AssertOp = ISD::DELETED_NODE;
4635     SmallVector<SDValue, 4> ReturnValues;
4636     unsigned CurReg = 0;
4637     for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
4638       EVT VT = RetTys[I];
4639       EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
4640       unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
4641
4642       SDValue ReturnValue =
4643         getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
4644                          RegisterVT, VT, AssertOp);
4645       ReturnValues.push_back(ReturnValue);
4646       CurReg += NumRegs;
4647     }
4648
4649     setValue(CS.getInstruction(),
4650              DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
4651                          DAG.getVTList(&RetTys[0], RetTys.size()),
4652                          &ReturnValues[0], ReturnValues.size()));
4653
4654   }
4655
4656   // As a special case, a null chain means that a tail call has been emitted and
4657   // the DAG root is already updated.
4658   if (Result.second.getNode())
4659     DAG.setRoot(Result.second);
4660   else
4661     HasTailCall = true;
4662
4663   if (LandingPad) {
4664     // Insert a label at the end of the invoke call to mark the try range.  This
4665     // can be used to detect deletion of the invoke via the MachineModuleInfo.
4666     MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol();
4667     DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel));
4668
4669     // Inform MachineModuleInfo of range.
4670     MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
4671   }
4672 }
4673
4674 /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
4675 /// value is equal or not-equal to zero.
4676 static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
4677   for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
4678        UI != E; ++UI) {
4679     if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
4680       if (IC->isEquality())
4681         if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
4682           if (C->isNullValue())
4683             continue;
4684     // Unknown instruction.
4685     return false;
4686   }
4687   return true;
4688 }
4689
4690 static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
4691                              const Type *LoadTy,
4692                              SelectionDAGBuilder &Builder) {
4693
4694   // Check to see if this load can be trivially constant folded, e.g. if the
4695   // input is from a string literal.
4696   if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
4697     // Cast pointer to the type we really want to load.
4698     LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
4699                                          PointerType::getUnqual(LoadTy));
4700
4701     if (const Constant *LoadCst =
4702           ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
4703                                        Builder.TD))
4704       return Builder.getValue(LoadCst);
4705   }
4706
4707   // Otherwise, we have to emit the load.  If the pointer is to unfoldable but
4708   // still constant memory, the input chain can be the entry node.
4709   SDValue Root;
4710   bool ConstantMemory = false;
4711
4712   // Do not serialize (non-volatile) loads of constant memory with anything.
4713   if (Builder.AA->pointsToConstantMemory(PtrVal)) {
4714     Root = Builder.DAG.getEntryNode();
4715     ConstantMemory = true;
4716   } else {
4717     // Do not serialize non-volatile loads against each other.
4718     Root = Builder.DAG.getRoot();
4719   }
4720
4721   SDValue Ptr = Builder.getValue(PtrVal);
4722   SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
4723                                         Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/,
4724                                         false /*volatile*/,
4725                                         false /*nontemporal*/, 1 /* align=1 */);
4726
4727   if (!ConstantMemory)
4728     Builder.PendingLoads.push_back(LoadVal.getValue(1));
4729   return LoadVal;
4730 }
4731
4732
4733 /// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
4734 /// If so, return true and lower it, otherwise return false and it will be
4735 /// lowered like a normal call.
4736 bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
4737   // Verify that the prototype makes sense.  int memcmp(void*,void*,size_t)
4738   if (I.getNumArgOperands() != 3)
4739     return false;
4740
4741   const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
4742   if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
4743       !I.getArgOperand(2)->getType()->isIntegerTy() ||
4744       !I.getType()->isIntegerTy())
4745     return false;
4746
4747   const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2));
4748
4749   // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS)  != 0
4750   // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS)  != 0
4751   if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
4752     bool ActuallyDoIt = true;
4753     MVT LoadVT;
4754     const Type *LoadTy;
4755     switch (Size->getZExtValue()) {
4756     default:
4757       LoadVT = MVT::Other;
4758       LoadTy = 0;
4759       ActuallyDoIt = false;
4760       break;
4761     case 2:
4762       LoadVT = MVT::i16;
4763       LoadTy = Type::getInt16Ty(Size->getContext());
4764       break;
4765     case 4:
4766       LoadVT = MVT::i32;
4767       LoadTy = Type::getInt32Ty(Size->getContext());
4768       break;
4769     case 8:
4770       LoadVT = MVT::i64;
4771       LoadTy = Type::getInt64Ty(Size->getContext());
4772       break;
4773         /*
4774     case 16:
4775       LoadVT = MVT::v4i32;
4776       LoadTy = Type::getInt32Ty(Size->getContext());
4777       LoadTy = VectorType::get(LoadTy, 4);
4778       break;
4779          */
4780     }
4781
4782     // This turns into unaligned loads.  We only do this if the target natively
4783     // supports the MVT we'll be loading or if it is small enough (<= 4) that
4784     // we'll only produce a small number of byte loads.
4785
4786     // Require that we can find a legal MVT, and only do this if the target
4787     // supports unaligned loads of that type.  Expanding into byte loads would
4788     // bloat the code.
4789     if (ActuallyDoIt && Size->getZExtValue() > 4) {
4790       // TODO: Handle 5 byte compare as 4-byte + 1 byte.
4791       // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
4792       if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT))
4793         ActuallyDoIt = false;
4794     }
4795
4796     if (ActuallyDoIt) {
4797       SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
4798       SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
4799
4800       SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
4801                                  ISD::SETNE);
4802       EVT CallVT = TLI.getValueType(I.getType(), true);
4803       setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT));
4804       return true;
4805     }
4806   }
4807
4808
4809   return false;
4810 }
4811
4812
4813 void SelectionDAGBuilder::visitCall(const CallInst &I) {
4814   // Handle inline assembly differently.
4815   if (isa<InlineAsm>(I.getCalledValue())) {
4816     visitInlineAsm(&I);
4817     return;
4818   }
4819
4820   const char *RenameFn = 0;
4821   if (Function *F = I.getCalledFunction()) {
4822     if (F->isDeclaration()) {
4823       if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) {
4824         if (unsigned IID = II->getIntrinsicID(F)) {
4825           RenameFn = visitIntrinsicCall(I, IID);
4826           if (!RenameFn)
4827             return;
4828         }
4829       }
4830       if (unsigned IID = F->getIntrinsicID()) {
4831         RenameFn = visitIntrinsicCall(I, IID);
4832         if (!RenameFn)
4833           return;
4834       }
4835     }
4836
4837     // Check for well-known libc/libm calls.  If the function is internal, it
4838     // can't be a library call.
4839     if (!F->hasLocalLinkage() && F->hasName()) {
4840       StringRef Name = F->getName();
4841       if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") {
4842         if (I.getNumArgOperands() == 2 &&   // Basic sanity checks.
4843             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
4844             I.getType() == I.getArgOperand(0)->getType() &&
4845             I.getType() == I.getArgOperand(1)->getType()) {
4846           SDValue LHS = getValue(I.getArgOperand(0));
4847           SDValue RHS = getValue(I.getArgOperand(1));
4848           setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
4849                                    LHS.getValueType(), LHS, RHS));
4850           return;
4851         }
4852       } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
4853         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
4854             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
4855             I.getType() == I.getArgOperand(0)->getType()) {
4856           SDValue Tmp = getValue(I.getArgOperand(0));
4857           setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
4858                                    Tmp.getValueType(), Tmp));
4859           return;
4860         }
4861       } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
4862         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
4863             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
4864             I.getType() == I.getArgOperand(0)->getType() &&
4865             I.onlyReadsMemory()) {
4866           SDValue Tmp = getValue(I.getArgOperand(0));
4867           setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
4868                                    Tmp.getValueType(), Tmp));
4869           return;
4870         }
4871       } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
4872         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
4873             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
4874             I.getType() == I.getArgOperand(0)->getType() &&
4875             I.onlyReadsMemory()) {
4876           SDValue Tmp = getValue(I.getArgOperand(0));
4877           setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
4878                                    Tmp.getValueType(), Tmp));
4879           return;
4880         }
4881       } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
4882         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
4883             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
4884             I.getType() == I.getArgOperand(0)->getType() &&
4885             I.onlyReadsMemory()) {
4886           SDValue Tmp = getValue(I.getArgOperand(0));
4887           setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
4888                                    Tmp.getValueType(), Tmp));
4889           return;
4890         }
4891       } else if (Name == "memcmp") {
4892         if (visitMemCmpCall(I))
4893           return;
4894       }
4895     }
4896   }
4897
4898   SDValue Callee;
4899   if (!RenameFn)
4900     Callee = getValue(I.getCalledValue());
4901   else
4902     Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
4903
4904   // Check if we can potentially perform a tail call. More detailed checking is
4905   // be done within LowerCallTo, after more information about the call is known.
4906   LowerCallTo(&I, Callee, I.isTailCall());
4907 }
4908
4909 namespace llvm {
4910
4911 /// AsmOperandInfo - This contains information for each constraint that we are
4912 /// lowering.
4913 class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo :
4914     public TargetLowering::AsmOperandInfo {
4915 public:
4916   /// CallOperand - If this is the result output operand or a clobber
4917   /// this is null, otherwise it is the incoming operand to the CallInst.
4918   /// This gets modified as the asm is processed.
4919   SDValue CallOperand;
4920
4921   /// AssignedRegs - If this is a register or register class operand, this
4922   /// contains the set of register corresponding to the operand.
4923   RegsForValue AssignedRegs;
4924
4925   explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info)
4926     : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
4927   }
4928
4929   /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
4930   /// busy in OutputRegs/InputRegs.
4931   void MarkAllocatedRegs(bool isOutReg, bool isInReg,
4932                          std::set<unsigned> &OutputRegs,
4933                          std::set<unsigned> &InputRegs,
4934                          const TargetRegisterInfo &TRI) const {
4935     if (isOutReg) {
4936       for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
4937         MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
4938     }
4939     if (isInReg) {
4940       for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
4941         MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
4942     }
4943   }
4944
4945   /// getCallOperandValEVT - Return the EVT of the Value* that this operand
4946   /// corresponds to.  If there is no Value* for this operand, it returns
4947   /// MVT::Other.
4948   EVT getCallOperandValEVT(LLVMContext &Context,
4949                            const TargetLowering &TLI,
4950                            const TargetData *TD) const {
4951     if (CallOperandVal == 0) return MVT::Other;
4952
4953     if (isa<BasicBlock>(CallOperandVal))
4954       return TLI.getPointerTy();
4955
4956     const llvm::Type *OpTy = CallOperandVal->getType();
4957
4958     // If this is an indirect operand, the operand is a pointer to the
4959     // accessed type.
4960     if (isIndirect) {
4961       const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
4962       if (!PtrTy)
4963         report_fatal_error("Indirect operand for inline asm not a pointer!");
4964       OpTy = PtrTy->getElementType();
4965     }
4966
4967     // If OpTy is not a single value, it may be a struct/union that we
4968     // can tile with integers.
4969     if (!OpTy->isSingleValueType() && OpTy->isSized()) {
4970       unsigned BitSize = TD->getTypeSizeInBits(OpTy);
4971       switch (BitSize) {
4972       default: break;
4973       case 1:
4974       case 8:
4975       case 16:
4976       case 32:
4977       case 64:
4978       case 128:
4979         OpTy = IntegerType::get(Context, BitSize);
4980         break;
4981       }
4982     }
4983
4984     return TLI.getValueType(OpTy, true);
4985   }
4986
4987 private:
4988   /// MarkRegAndAliases - Mark the specified register and all aliases in the
4989   /// specified set.
4990   static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
4991                                 const TargetRegisterInfo &TRI) {
4992     assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
4993     Regs.insert(Reg);
4994     if (const unsigned *Aliases = TRI.getAliasSet(Reg))
4995       for (; *Aliases; ++Aliases)
4996         Regs.insert(*Aliases);
4997   }
4998 };
4999
5000 } // end llvm namespace.
5001
5002 /// isAllocatableRegister - If the specified register is safe to allocate,
5003 /// i.e. it isn't a stack pointer or some other special register, return the
5004 /// register class for the register.  Otherwise, return null.
5005 static const TargetRegisterClass *
5006 isAllocatableRegister(unsigned Reg, MachineFunction &MF,
5007                       const TargetLowering &TLI,
5008                       const TargetRegisterInfo *TRI) {
5009   EVT FoundVT = MVT::Other;
5010   const TargetRegisterClass *FoundRC = 0;
5011   for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
5012        E = TRI->regclass_end(); RCI != E; ++RCI) {
5013     EVT ThisVT = MVT::Other;
5014
5015     const TargetRegisterClass *RC = *RCI;
5016     // If none of the value types for this register class are valid, we
5017     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
5018     for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
5019          I != E; ++I) {
5020       if (TLI.isTypeLegal(*I)) {
5021         // If we have already found this register in a different register class,
5022         // choose the one with the largest VT specified.  For example, on
5023         // PowerPC, we favor f64 register classes over f32.
5024         if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
5025           ThisVT = *I;
5026           break;
5027         }
5028       }
5029     }
5030
5031     if (ThisVT == MVT::Other) continue;
5032
5033     // NOTE: This isn't ideal.  In particular, this might allocate the
5034     // frame pointer in functions that need it (due to them not being taken
5035     // out of allocation, because a variable sized allocation hasn't been seen
5036     // yet).  This is a slight code pessimization, but should still work.
5037     for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
5038          E = RC->allocation_order_end(MF); I != E; ++I)
5039       if (*I == Reg) {
5040         // We found a matching register class.  Keep looking at others in case
5041         // we find one with larger registers that this physreg is also in.
5042         FoundRC = RC;
5043         FoundVT = ThisVT;
5044         break;
5045       }
5046   }
5047   return FoundRC;
5048 }
5049
5050 /// GetRegistersForValue - Assign registers (virtual or physical) for the
5051 /// specified operand.  We prefer to assign virtual registers, to allow the
5052 /// register allocator to handle the assignment process.  However, if the asm
5053 /// uses features that we can't model on machineinstrs, we have SDISel do the
5054 /// allocation.  This produces generally horrible, but correct, code.
5055 ///
5056 ///   OpInfo describes the operand.
5057 ///   Input and OutputRegs are the set of already allocated physical registers.
5058 ///
5059 void SelectionDAGBuilder::
5060 GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
5061                      std::set<unsigned> &OutputRegs,
5062                      std::set<unsigned> &InputRegs) {
5063   LLVMContext &Context = FuncInfo.Fn->getContext();
5064
5065   // Compute whether this value requires an input register, an output register,
5066   // or both.
5067   bool isOutReg = false;
5068   bool isInReg = false;
5069   switch (OpInfo.Type) {
5070   case InlineAsm::isOutput:
5071     isOutReg = true;
5072
5073     // If there is an input constraint that matches this, we need to reserve
5074     // the input register so no other inputs allocate to it.
5075     isInReg = OpInfo.hasMatchingInput();
5076     break;
5077   case InlineAsm::isInput:
5078     isInReg = true;
5079     isOutReg = false;
5080     break;
5081   case InlineAsm::isClobber:
5082     isOutReg = true;
5083     isInReg = true;
5084     break;
5085   }
5086
5087
5088   MachineFunction &MF = DAG.getMachineFunction();
5089   SmallVector<unsigned, 4> Regs;
5090
5091   // If this is a constraint for a single physreg, or a constraint for a
5092   // register class, find it.
5093   std::pair<unsigned, const TargetRegisterClass*> PhysReg =
5094     TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
5095                                      OpInfo.ConstraintVT);
5096
5097   unsigned NumRegs = 1;
5098   if (OpInfo.ConstraintVT != MVT::Other) {
5099     // If this is a FP input in an integer register (or visa versa) insert a bit
5100     // cast of the input value.  More generally, handle any case where the input
5101     // value disagrees with the register class we plan to stick this in.
5102     if (OpInfo.Type == InlineAsm::isInput &&
5103         PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
5104       // Try to convert to the first EVT that the reg class contains.  If the
5105       // types are identical size, use a bitcast to convert (e.g. two differing
5106       // vector types).
5107       EVT RegVT = *PhysReg.second->vt_begin();
5108       if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
5109         OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
5110                                          RegVT, OpInfo.CallOperand);
5111         OpInfo.ConstraintVT = RegVT;
5112       } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
5113         // If the input is a FP value and we want it in FP registers, do a
5114         // bitcast to the corresponding integer type.  This turns an f64 value
5115         // into i64, which can be passed with two i32 values on a 32-bit
5116         // machine.
5117         RegVT = EVT::getIntegerVT(Context,
5118                                   OpInfo.ConstraintVT.getSizeInBits());
5119         OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
5120                                          RegVT, OpInfo.CallOperand);
5121         OpInfo.ConstraintVT = RegVT;
5122       }
5123     }
5124
5125     NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
5126   }
5127
5128   EVT RegVT;
5129   EVT ValueVT = OpInfo.ConstraintVT;
5130
5131   // If this is a constraint for a specific physical register, like {r17},
5132   // assign it now.
5133   if (unsigned AssignedReg = PhysReg.first) {
5134     const TargetRegisterClass *RC = PhysReg.second;
5135     if (OpInfo.ConstraintVT == MVT::Other)
5136       ValueVT = *RC->vt_begin();
5137
5138     // Get the actual register value type.  This is important, because the user
5139     // may have asked for (e.g.) the AX register in i32 type.  We need to
5140     // remember that AX is actually i16 to get the right extension.
5141     RegVT = *RC->vt_begin();
5142
5143     // This is a explicit reference to a physical register.
5144     Regs.push_back(AssignedReg);
5145
5146     // If this is an expanded reference, add the rest of the regs to Regs.
5147     if (NumRegs != 1) {
5148       TargetRegisterClass::iterator I = RC->begin();
5149       for (; *I != AssignedReg; ++I)
5150         assert(I != RC->end() && "Didn't find reg!");
5151
5152       // Already added the first reg.
5153       --NumRegs; ++I;
5154       for (; NumRegs; --NumRegs, ++I) {
5155         assert(I != RC->end() && "Ran out of registers to allocate!");
5156         Regs.push_back(*I);
5157       }
5158     }
5159
5160     OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
5161     const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
5162     OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
5163     return;
5164   }
5165
5166   // Otherwise, if this was a reference to an LLVM register class, create vregs
5167   // for this reference.
5168   if (const TargetRegisterClass *RC = PhysReg.second) {
5169     RegVT = *RC->vt_begin();
5170     if (OpInfo.ConstraintVT == MVT::Other)
5171       ValueVT = RegVT;
5172
5173     // Create the appropriate number of virtual registers.
5174     MachineRegisterInfo &RegInfo = MF.getRegInfo();
5175     for (; NumRegs; --NumRegs)
5176       Regs.push_back(RegInfo.createVirtualRegister(RC));
5177
5178     OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
5179     return;
5180   }
5181
5182   // This is a reference to a register class that doesn't directly correspond
5183   // to an LLVM register class.  Allocate NumRegs consecutive, available,
5184   // registers from the class.
5185   std::vector<unsigned> RegClassRegs
5186     = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
5187                                             OpInfo.ConstraintVT);
5188
5189   const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
5190   unsigned NumAllocated = 0;
5191   for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
5192     unsigned Reg = RegClassRegs[i];
5193     // See if this register is available.
5194     if ((isOutReg && OutputRegs.count(Reg)) ||   // Already used.
5195         (isInReg  && InputRegs.count(Reg))) {    // Already used.
5196       // Make sure we find consecutive registers.
5197       NumAllocated = 0;
5198       continue;
5199     }
5200
5201     // Check to see if this register is allocatable (i.e. don't give out the
5202     // stack pointer).
5203     const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI);
5204     if (!RC) {        // Couldn't allocate this register.
5205       // Reset NumAllocated to make sure we return consecutive registers.
5206       NumAllocated = 0;
5207       continue;
5208     }
5209
5210     // Okay, this register is good, we can use it.
5211     ++NumAllocated;
5212
5213     // If we allocated enough consecutive registers, succeed.
5214     if (NumAllocated == NumRegs) {
5215       unsigned RegStart = (i-NumAllocated)+1;
5216       unsigned RegEnd   = i+1;
5217       // Mark all of the allocated registers used.
5218       for (unsigned i = RegStart; i != RegEnd; ++i)
5219         Regs.push_back(RegClassRegs[i]);
5220
5221       OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(),
5222                                          OpInfo.ConstraintVT);
5223       OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
5224       return;
5225     }
5226   }
5227
5228   // Otherwise, we couldn't allocate enough registers for this.
5229 }
5230
5231 /// visitInlineAsm - Handle a call to an InlineAsm object.
5232 ///
5233 void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
5234   const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
5235
5236   /// ConstraintOperands - Information about all of the constraints.
5237   std::vector<SDISelAsmOperandInfo> ConstraintOperands;
5238
5239   std::set<unsigned> OutputRegs, InputRegs;
5240
5241   // Do a prepass over the constraints, canonicalizing them, and building up the
5242   // ConstraintOperands list.
5243   std::vector<InlineAsm::ConstraintInfo>
5244     ConstraintInfos = IA->ParseConstraints();
5245
5246   bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);
5247
5248   SDValue Chain, Flag;
5249
5250   // We won't need to flush pending loads if this asm doesn't touch
5251   // memory and is nonvolatile.
5252   if (hasMemory || IA->hasSideEffects())
5253     Chain = getRoot();
5254   else
5255     Chain = DAG.getRoot();
5256
5257   unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
5258   unsigned ResNo = 0;   // ResNo - The result number of the next output.
5259   for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
5260     ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));
5261     SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
5262
5263     EVT OpVT = MVT::Other;
5264
5265     // Compute the value type for each operand.
5266     switch (OpInfo.Type) {
5267     case InlineAsm::isOutput:
5268       // Indirect outputs just consume an argument.
5269       if (OpInfo.isIndirect) {
5270         OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
5271         break;
5272       }
5273
5274       // The return value of the call is this value.  As such, there is no
5275       // corresponding argument.
5276       assert(!CS.getType()->isVoidTy() &&
5277              "Bad inline asm!");
5278       if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
5279         OpVT = TLI.getValueType(STy->getElementType(ResNo));
5280       } else {
5281         assert(ResNo == 0 && "Asm only has one result!");
5282         OpVT = TLI.getValueType(CS.getType());
5283       }
5284       ++ResNo;
5285       break;
5286     case InlineAsm::isInput:
5287       OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
5288       break;
5289     case InlineAsm::isClobber:
5290       // Nothing to do.
5291       break;
5292     }
5293
5294     // If this is an input or an indirect output, process the call argument.
5295     // BasicBlocks are labels, currently appearing only in asm's.
5296     if (OpInfo.CallOperandVal) {
5297       // Strip bitcasts, if any.  This mostly comes up for functions.
5298       OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
5299
5300       if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
5301         OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
5302       } else {
5303         OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
5304       }
5305
5306       OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD);
5307     }
5308
5309     OpInfo.ConstraintVT = OpVT;
5310   }
5311
5312   // Second pass over the constraints: compute which constraint option to use
5313   // and assign registers to constraints that want a specific physreg.
5314   for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
5315     SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
5316
5317     // If this is an output operand with a matching input operand, look up the
5318     // matching input. If their types mismatch, e.g. one is an integer, the
5319     // other is floating point, or their sizes are different, flag it as an
5320     // error.
5321     if (OpInfo.hasMatchingInput()) {
5322       SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5323
5324       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5325         if ((OpInfo.ConstraintVT.isInteger() !=
5326              Input.ConstraintVT.isInteger()) ||
5327             (OpInfo.ConstraintVT.getSizeInBits() !=
5328              Input.ConstraintVT.getSizeInBits())) {
5329           report_fatal_error("Unsupported asm: input constraint"
5330                              " with a matching output constraint of"
5331                              " incompatible type!");
5332         }
5333         Input.ConstraintVT = OpInfo.ConstraintVT;
5334       }
5335     }
5336
5337     // Compute the constraint code and ConstraintType to use.
5338     TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
5339
5340     // If this is a memory input, and if the operand is not indirect, do what we
5341     // need to to provide an address for the memory input.
5342     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
5343         !OpInfo.isIndirect) {
5344       assert(OpInfo.Type == InlineAsm::isInput &&
5345              "Can only indirectify direct input operands!");
5346
5347       // Memory operands really want the address of the value.  If we don't have
5348       // an indirect input, put it in the constpool if we can, otherwise spill
5349       // it to a stack slot.
5350
5351       // If the operand is a float, integer, or vector constant, spill to a
5352       // constant pool entry to get its address.
5353       const Value *OpVal = OpInfo.CallOperandVal;
5354       if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
5355           isa<ConstantVector>(OpVal)) {
5356         OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
5357                                                  TLI.getPointerTy());
5358       } else {
5359         // Otherwise, create a stack slot and emit a store to it before the
5360         // asm.
5361         const Type *Ty = OpVal->getType();
5362         uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
5363         unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
5364         MachineFunction &MF = DAG.getMachineFunction();
5365         int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
5366         SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
5367         Chain = DAG.getStore(Chain, getCurDebugLoc(),
5368                              OpInfo.CallOperand, StackSlot, NULL, 0,
5369                              false, false, 0);
5370         OpInfo.CallOperand = StackSlot;
5371       }
5372
5373       // There is no longer a Value* corresponding to this operand.
5374       OpInfo.CallOperandVal = 0;
5375
5376       // It is now an indirect operand.
5377       OpInfo.isIndirect = true;
5378     }
5379
5380     // If this constraint is for a specific register, allocate it before
5381     // anything else.
5382     if (OpInfo.ConstraintType == TargetLowering::C_Register)
5383       GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
5384   }
5385
5386   ConstraintInfos.clear();
5387
5388   // Second pass - Loop over all of the operands, assigning virtual or physregs
5389   // to register class operands.
5390   for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
5391     SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
5392
5393     // C_Register operands have already been allocated, Other/Memory don't need
5394     // to be.
5395     if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
5396       GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
5397   }
5398
5399   // AsmNodeOperands - The operands for the ISD::INLINEASM node.
5400   std::vector<SDValue> AsmNodeOperands;
5401   AsmNodeOperands.push_back(SDValue());  // reserve space for input chain
5402   AsmNodeOperands.push_back(
5403           DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
5404                                       TLI.getPointerTy()));
5405
5406   // If we have a !srcloc metadata node associated with it, we want to attach
5407   // this to the ultimately generated inline asm machineinstr.  To do this, we
5408   // pass in the third operand as this (potentially null) inline asm MDNode.
5409   const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
5410   AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
5411
5412   // Remember the AlignStack bit as operand 3.
5413   AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0,
5414                                             MVT::i1));
5415
5416   // Loop over all of the inputs, copying the operand values into the
5417   // appropriate registers and processing the output regs.
5418   RegsForValue RetValRegs;
5419
5420   // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
5421   std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
5422
5423   for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
5424     SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
5425
5426     switch (OpInfo.Type) {
5427     case InlineAsm::isOutput: {
5428       if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
5429           OpInfo.ConstraintType != TargetLowering::C_Register) {
5430         // Memory output, or 'other' output (e.g. 'X' constraint).
5431         assert(OpInfo.isIndirect && "Memory output must be indirect operand");
5432
5433         // Add information to the INLINEASM node to know about this output.
5434         unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
5435         AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
5436                                                         TLI.getPointerTy()));
5437         AsmNodeOperands.push_back(OpInfo.CallOperand);
5438         break;
5439       }
5440
5441       // Otherwise, this is a register or register class output.
5442
5443       // Copy the output from the appropriate register.  Find a register that
5444       // we can use.
5445       if (OpInfo.AssignedRegs.Regs.empty())
5446         report_fatal_error("Couldn't allocate output reg for constraint '" +
5447                            Twine(OpInfo.ConstraintCode) + "'!");
5448
5449       // If this is an indirect operand, store through the pointer after the
5450       // asm.
5451       if (OpInfo.isIndirect) {
5452         IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
5453                                                       OpInfo.CallOperandVal));
5454       } else {
5455         // This is the result value of the call.
5456         assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
5457         // Concatenate this output onto the outputs list.
5458         RetValRegs.append(OpInfo.AssignedRegs);
5459       }
5460
5461       // Add information to the INLINEASM node to know that this register is
5462       // set.
5463       OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
5464                                            InlineAsm::Kind_RegDefEarlyClobber :
5465                                                InlineAsm::Kind_RegDef,
5466                                                false,
5467                                                0,
5468                                                DAG,
5469                                                AsmNodeOperands);
5470       break;
5471     }
5472     case InlineAsm::isInput: {
5473       SDValue InOperandVal = OpInfo.CallOperand;
5474
5475       if (OpInfo.isMatchingInputConstraint()) {   // Matching constraint?
5476         // If this is required to match an output register we have already set,
5477         // just use its register.
5478         unsigned OperandNo = OpInfo.getMatchedOperand();
5479
5480         // Scan until we find the definition we already emitted of this operand.
5481         // When we find it, create a RegsForValue operand.
5482         unsigned CurOp = InlineAsm::Op_FirstOperand;
5483         for (; OperandNo; --OperandNo) {
5484           // Advance to the next operand.
5485           unsigned OpFlag =
5486             cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
5487           assert((InlineAsm::isRegDefKind(OpFlag) ||
5488                   InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
5489                   InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?");
5490           CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
5491         }
5492
5493         unsigned OpFlag =
5494           cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
5495         if (InlineAsm::isRegDefKind(OpFlag) ||
5496             InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
5497           // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
5498           if (OpInfo.isIndirect) {
5499             // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
5500             LLVMContext &Ctx = *DAG.getContext();
5501             Ctx.emitError(CS.getInstruction(),  "inline asm not supported yet:"
5502                           " don't know how to handle tied "
5503                           "indirect register inputs");
5504           }
5505
5506           RegsForValue MatchedRegs;
5507           MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
5508           EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
5509           MatchedRegs.RegVTs.push_back(RegVT);
5510           MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
5511           for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
5512                i != e; ++i)
5513             MatchedRegs.Regs.push_back
5514               (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
5515
5516           // Use the produced MatchedRegs object to
5517           MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
5518                                     Chain, &Flag);
5519           MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
5520                                            true, OpInfo.getMatchedOperand(),
5521                                            DAG, AsmNodeOperands);
5522           break;
5523         }
5524
5525         assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
5526         assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
5527                "Unexpected number of operands");
5528         // Add information to the INLINEASM node to know about this input.
5529         // See InlineAsm.h isUseOperandTiedToDef.
5530         OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
5531                                                     OpInfo.getMatchedOperand());
5532         AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
5533                                                         TLI.getPointerTy()));
5534         AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
5535         break;
5536       }
5537
5538       // Treat indirect 'X' constraint as memory.
5539       if (OpInfo.ConstraintType == TargetLowering::C_Other &&
5540           OpInfo.isIndirect)
5541         OpInfo.ConstraintType = TargetLowering::C_Memory;
5542
5543       if (OpInfo.ConstraintType == TargetLowering::C_Other) {
5544         std::vector<SDValue> Ops;
5545         TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
5546                                          Ops, DAG);
5547         if (Ops.empty())
5548           report_fatal_error("Invalid operand for inline asm constraint '" +
5549                              Twine(OpInfo.ConstraintCode) + "'!");
5550
5551         // Add information to the INLINEASM node to know about this input.
5552         unsigned ResOpType =
5553           InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
5554         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
5555                                                         TLI.getPointerTy()));
5556         AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
5557         break;
5558       }
5559
5560       if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
5561         assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
5562         assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
5563                "Memory operands expect pointer values");
5564
5565         // Add information to the INLINEASM node to know about this input.
5566         unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
5567         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
5568                                                         TLI.getPointerTy()));
5569         AsmNodeOperands.push_back(InOperandVal);
5570         break;
5571       }
5572
5573       assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
5574               OpInfo.ConstraintType == TargetLowering::C_Register) &&
5575              "Unknown constraint type!");
5576       assert(!OpInfo.isIndirect &&
5577              "Don't know how to handle indirect register inputs yet!");
5578
5579       // Copy the input into the appropriate registers.
5580       if (OpInfo.AssignedRegs.Regs.empty() ||
5581           !OpInfo.AssignedRegs.areValueTypesLegal(TLI))
5582         report_fatal_error("Couldn't allocate input reg for constraint '" +
5583                            Twine(OpInfo.ConstraintCode) + "'!");
5584
5585       OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
5586                                         Chain, &Flag);
5587
5588       OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
5589                                                DAG, AsmNodeOperands);
5590       break;
5591     }
5592     case InlineAsm::isClobber: {
5593       // Add the clobbered value to the operand list, so that the register
5594       // allocator is aware that the physreg got clobbered.
5595       if (!OpInfo.AssignedRegs.Regs.empty())
5596         OpInfo.AssignedRegs.AddInlineAsmOperands(
5597                                             InlineAsm::Kind_RegDefEarlyClobber,
5598                                                  false, 0, DAG,
5599                                                  AsmNodeOperands);
5600       break;
5601     }
5602     }
5603   }
5604
5605   // Finish up input operands.  Set the input chain and add the flag last.
5606   AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5607   if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
5608
5609   Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
5610                       DAG.getVTList(MVT::Other, MVT::Flag),
5611                       &AsmNodeOperands[0], AsmNodeOperands.size());
5612   Flag = Chain.getValue(1);
5613
5614   // If this asm returns a register value, copy the result from that register
5615   // and set it as the value of the call.
5616   if (!RetValRegs.Regs.empty()) {
5617     SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
5618                                              Chain, &Flag);
5619
5620     // FIXME: Why don't we do this for inline asms with MRVs?
5621     if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
5622       EVT ResultType = TLI.getValueType(CS.getType());
5623
5624       // If any of the results of the inline asm is a vector, it may have the
5625       // wrong width/num elts.  This can happen for register classes that can
5626       // contain multiple different value types.  The preg or vreg allocated may
5627       // not have the same VT as was expected.  Convert it to the right type
5628       // with bit_convert.
5629       if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
5630         Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
5631                           ResultType, Val);
5632
5633       } else if (ResultType != Val.getValueType() &&
5634                  ResultType.isInteger() && Val.getValueType().isInteger()) {
5635         // If a result value was tied to an input value, the computed result may
5636         // have a wider width than the expected result.  Extract the relevant
5637         // portion.
5638         Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);
5639       }
5640
5641       assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
5642     }
5643
5644     setValue(CS.getInstruction(), Val);
5645     // Don't need to use this as a chain in this case.
5646     if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
5647       return;
5648   }
5649
5650   std::vector<std::pair<SDValue, const Value *> > StoresToEmit;
5651
5652   // Process indirect outputs, first output all of the flagged copies out of
5653   // physregs.
5654   for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
5655     RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
5656     const Value *Ptr = IndirectStoresToEmit[i].second;
5657     SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
5658                                              Chain, &Flag);
5659     StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
5660   }
5661
5662   // Emit the non-flagged stores from the physregs.
5663   SmallVector<SDValue, 8> OutChains;
5664   for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) {
5665     SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
5666                                StoresToEmit[i].first,
5667                                getValue(StoresToEmit[i].second),
5668                                StoresToEmit[i].second, 0,
5669                                false, false, 0);
5670     OutChains.push_back(Val);
5671   }
5672
5673   if (!OutChains.empty())
5674     Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
5675                         &OutChains[0], OutChains.size());
5676
5677   DAG.setRoot(Chain);
5678 }
5679
5680 void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
5681   DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
5682                           MVT::Other, getRoot(),
5683                           getValue(I.getArgOperand(0)),
5684                           DAG.getSrcValue(I.getArgOperand(0))));
5685 }
5686
5687 void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
5688   const TargetData &TD = *TLI.getTargetData();
5689   SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
5690                            getRoot(), getValue(I.getOperand(0)),
5691                            DAG.getSrcValue(I.getOperand(0)),
5692                            TD.getABITypeAlignment(I.getType()));
5693   setValue(&I, V);
5694   DAG.setRoot(V.getValue(1));
5695 }
5696
5697 void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
5698   DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
5699                           MVT::Other, getRoot(),
5700                           getValue(I.getArgOperand(0)),
5701                           DAG.getSrcValue(I.getArgOperand(0))));
5702 }
5703
5704 void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
5705   DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
5706                           MVT::Other, getRoot(),
5707                           getValue(I.getArgOperand(0)),
5708                           getValue(I.getArgOperand(1)),
5709                           DAG.getSrcValue(I.getArgOperand(0)),
5710                           DAG.getSrcValue(I.getArgOperand(1))));
5711 }
5712
5713 /// TargetLowering::LowerCallTo - This is the default LowerCallTo
5714 /// implementation, which just calls LowerCall.
5715 /// FIXME: When all targets are
5716 /// migrated to using LowerCall, this hook should be integrated into SDISel.
5717 std::pair<SDValue, SDValue>
5718 TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
5719                             bool RetSExt, bool RetZExt, bool isVarArg,
5720                             bool isInreg, unsigned NumFixedArgs,
5721                             CallingConv::ID CallConv, bool isTailCall,
5722                             bool isReturnValueUsed,
5723                             SDValue Callee,
5724                             ArgListTy &Args, SelectionDAG &DAG,
5725                             DebugLoc dl) const {
5726   // Handle all of the outgoing arguments.
5727   SmallVector<ISD::OutputArg, 32> Outs;
5728   SmallVector<SDValue, 32> OutVals;
5729   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
5730     SmallVector<EVT, 4> ValueVTs;
5731     ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
5732     for (unsigned Value = 0, NumValues = ValueVTs.size();
5733          Value != NumValues; ++Value) {
5734       EVT VT = ValueVTs[Value];
5735       const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
5736       SDValue Op = SDValue(Args[i].Node.getNode(),
5737                            Args[i].Node.getResNo() + Value);
5738       ISD::ArgFlagsTy Flags;
5739       unsigned OriginalAlignment =
5740         getTargetData()->getABITypeAlignment(ArgTy);
5741
5742       if (Args[i].isZExt)
5743         Flags.setZExt();
5744       if (Args[i].isSExt)
5745         Flags.setSExt();
5746       if (Args[i].isInReg)
5747         Flags.setInReg();
5748       if (Args[i].isSRet)
5749         Flags.setSRet();
5750       if (Args[i].isByVal) {
5751         Flags.setByVal();
5752         const PointerType *Ty = cast<PointerType>(Args[i].Ty);
5753         const Type *ElementTy = Ty->getElementType();
5754         unsigned FrameAlign = getByValTypeAlignment(ElementTy);
5755         unsigned FrameSize  = getTargetData()->getTypeAllocSize(ElementTy);
5756         // For ByVal, alignment should come from FE.  BE will guess if this
5757         // info is not there but there are cases it cannot get right.
5758         if (Args[i].Alignment)
5759           FrameAlign = Args[i].Alignment;
5760         Flags.setByValAlign(FrameAlign);
5761         Flags.setByValSize(FrameSize);
5762       }
5763       if (Args[i].isNest)
5764         Flags.setNest();
5765       Flags.setOrigAlign(OriginalAlignment);
5766
5767       EVT PartVT = getRegisterType(RetTy->getContext(), VT);
5768       unsigned NumParts = getNumRegisters(RetTy->getContext(), VT);
5769       SmallVector<SDValue, 4> Parts(NumParts);
5770       ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
5771
5772       if (Args[i].isSExt)
5773         ExtendKind = ISD::SIGN_EXTEND;
5774       else if (Args[i].isZExt)
5775         ExtendKind = ISD::ZERO_EXTEND;
5776
5777       getCopyToParts(DAG, dl, Op, &Parts[0], NumParts,
5778                      PartVT, ExtendKind);
5779
5780       for (unsigned j = 0; j != NumParts; ++j) {
5781         // if it isn't first piece, alignment must be 1
5782         ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
5783                                i < NumFixedArgs);
5784         if (NumParts > 1 && j == 0)
5785           MyFlags.Flags.setSplit();
5786         else if (j != 0)
5787           MyFlags.Flags.setOrigAlign(1);
5788
5789         Outs.push_back(MyFlags);
5790         OutVals.push_back(Parts[j]);
5791       }
5792     }
5793   }
5794
5795   // Handle the incoming return values from the call.
5796   SmallVector<ISD::InputArg, 32> Ins;
5797   SmallVector<EVT, 4> RetTys;
5798   ComputeValueVTs(*this, RetTy, RetTys);
5799   for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
5800     EVT VT = RetTys[I];
5801     EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
5802     unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
5803     for (unsigned i = 0; i != NumRegs; ++i) {
5804       ISD::InputArg MyFlags;
5805       MyFlags.VT = RegisterVT;
5806       MyFlags.Used = isReturnValueUsed;
5807       if (RetSExt)
5808         MyFlags.Flags.setSExt();
5809       if (RetZExt)
5810         MyFlags.Flags.setZExt();
5811       if (isInreg)
5812         MyFlags.Flags.setInReg();
5813       Ins.push_back(MyFlags);
5814     }
5815   }
5816
5817   SmallVector<SDValue, 4> InVals;
5818   Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
5819                     Outs, OutVals, Ins, dl, DAG, InVals);
5820
5821   // Verify that the target's LowerCall behaved as expected.
5822   assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
5823          "LowerCall didn't return a valid chain!");
5824   assert((!isTailCall || InVals.empty()) &&
5825          "LowerCall emitted a return value for a tail call!");
5826   assert((isTailCall || InVals.size() == Ins.size()) &&
5827          "LowerCall didn't emit the correct number of values!");
5828
5829   // For a tail call, the return value is merely live-out and there aren't
5830   // any nodes in the DAG representing it. Return a special value to
5831   // indicate that a tail call has been emitted and no more Instructions
5832   // should be processed in the current block.
5833   if (isTailCall) {
5834     DAG.setRoot(Chain);
5835     return std::make_pair(SDValue(), SDValue());
5836   }
5837
5838   DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5839           assert(InVals[i].getNode() &&
5840                  "LowerCall emitted a null value!");
5841           assert(Ins[i].VT == InVals[i].getValueType() &&
5842                  "LowerCall emitted a value with the wrong type!");
5843         });
5844
5845   // Collect the legal value parts into potentially illegal values
5846   // that correspond to the original function's return values.
5847   ISD::NodeType AssertOp = ISD::DELETED_NODE;
5848   if (RetSExt)
5849     AssertOp = ISD::AssertSext;
5850   else if (RetZExt)
5851     AssertOp = ISD::AssertZext;
5852   SmallVector<SDValue, 4> ReturnValues;
5853   unsigned CurReg = 0;
5854   for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
5855     EVT VT = RetTys[I];
5856     EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
5857     unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
5858
5859     ReturnValues.push_back(getCopyFromParts(DAG, dl, &InVals[CurReg],
5860                                             NumRegs, RegisterVT, VT,
5861                                             AssertOp));
5862     CurReg += NumRegs;
5863   }
5864
5865   // For a function returning void, there is no return value. We can't create
5866   // such a node, so we just return a null return value in that case. In
5867   // that case, nothing will actualy look at the value.
5868   if (ReturnValues.empty())
5869     return std::make_pair(SDValue(), Chain);
5870
5871   SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
5872                             DAG.getVTList(&RetTys[0], RetTys.size()),
5873                             &ReturnValues[0], ReturnValues.size());
5874   return std::make_pair(Res, Chain);
5875 }
5876
5877 void TargetLowering::LowerOperationWrapper(SDNode *N,
5878                                            SmallVectorImpl<SDValue> &Results,
5879                                            SelectionDAG &DAG) const {
5880   SDValue Res = LowerOperation(SDValue(N, 0), DAG);
5881   if (Res.getNode())
5882     Results.push_back(Res);
5883 }
5884
5885 SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
5886   llvm_unreachable("LowerOperation not implemented for this target!");
5887   return SDValue();
5888 }
5889
5890 void
5891 SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
5892   SDValue Op = getNonRegisterValue(V);
5893   assert((Op.getOpcode() != ISD::CopyFromReg ||
5894           cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
5895          "Copy from a reg to the same reg!");
5896   assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
5897
5898   RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
5899   SDValue Chain = DAG.getEntryNode();
5900   RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
5901   PendingExports.push_back(Chain);
5902 }
5903
5904 #include "llvm/CodeGen/SelectionDAGISel.h"
5905
5906 void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
5907   // If this is the entry block, emit arguments.
5908   const Function &F = *LLVMBB->getParent();
5909   SelectionDAG &DAG = SDB->DAG;
5910   DebugLoc dl = SDB->getCurDebugLoc();
5911   const TargetData *TD = TLI.getTargetData();
5912   SmallVector<ISD::InputArg, 16> Ins;
5913
5914   // Check whether the function can return without sret-demotion.
5915   SmallVector<ISD::OutputArg, 4> Outs;
5916   GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
5917                 Outs, TLI);
5918
5919   if (!FuncInfo->CanLowerReturn) {
5920     // Put in an sret pointer parameter before all the other parameters.
5921     SmallVector<EVT, 1> ValueVTs;
5922     ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
5923
5924     // NOTE: Assuming that a pointer will never break down to more than one VT
5925     // or one register.
5926     ISD::ArgFlagsTy Flags;
5927     Flags.setSRet();
5928     EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
5929     ISD::InputArg RetArg(Flags, RegisterVT, true);
5930     Ins.push_back(RetArg);
5931   }
5932
5933   // Set up the incoming argument description vector.
5934   unsigned Idx = 1;
5935   for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
5936        I != E; ++I, ++Idx) {
5937     SmallVector<EVT, 4> ValueVTs;
5938     ComputeValueVTs(TLI, I->getType(), ValueVTs);
5939     bool isArgValueUsed = !I->use_empty();
5940     for (unsigned Value = 0, NumValues = ValueVTs.size();
5941          Value != NumValues; ++Value) {
5942       EVT VT = ValueVTs[Value];
5943       const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
5944       ISD::ArgFlagsTy Flags;
5945       unsigned OriginalAlignment =
5946         TD->getABITypeAlignment(ArgTy);
5947
5948       if (F.paramHasAttr(Idx, Attribute::ZExt))
5949         Flags.setZExt();
5950       if (F.paramHasAttr(Idx, Attribute::SExt))
5951         Flags.setSExt();
5952       if (F.paramHasAttr(Idx, Attribute::InReg))
5953         Flags.setInReg();
5954       if (F.paramHasAttr(Idx, Attribute::StructRet))
5955         Flags.setSRet();
5956       if (F.paramHasAttr(Idx, Attribute::ByVal)) {
5957         Flags.setByVal();
5958         const PointerType *Ty = cast<PointerType>(I->getType());
5959         const Type *ElementTy = Ty->getElementType();
5960         unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy);
5961         unsigned FrameSize  = TD->getTypeAllocSize(ElementTy);
5962         // For ByVal, alignment should be passed from FE.  BE will guess if
5963         // this info is not there but there are cases it cannot get right.
5964         if (F.getParamAlignment(Idx))
5965           FrameAlign = F.getParamAlignment(Idx);
5966         Flags.setByValAlign(FrameAlign);
5967         Flags.setByValSize(FrameSize);
5968       }
5969       if (F.paramHasAttr(Idx, Attribute::Nest))
5970         Flags.setNest();
5971       Flags.setOrigAlign(OriginalAlignment);
5972
5973       EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
5974       unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
5975       for (unsigned i = 0; i != NumRegs; ++i) {
5976         ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
5977         if (NumRegs > 1 && i == 0)
5978           MyFlags.Flags.setSplit();
5979         // if it isn't first piece, alignment must be 1
5980         else if (i > 0)
5981           MyFlags.Flags.setOrigAlign(1);
5982         Ins.push_back(MyFlags);
5983       }
5984     }
5985   }
5986
5987   // Call the target to set up the argument values.
5988   SmallVector<SDValue, 8> InVals;
5989   SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
5990                                              F.isVarArg(), Ins,
5991                                              dl, DAG, InVals);
5992
5993   // Verify that the target's LowerFormalArguments behaved as expected.
5994   assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
5995          "LowerFormalArguments didn't return a valid chain!");
5996   assert(InVals.size() == Ins.size() &&
5997          "LowerFormalArguments didn't emit the correct number of values!");
5998   DEBUG({
5999       for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
6000         assert(InVals[i].getNode() &&
6001                "LowerFormalArguments emitted a null value!");
6002         assert(Ins[i].VT == InVals[i].getValueType() &&
6003                "LowerFormalArguments emitted a value with the wrong type!");
6004       }
6005     });
6006
6007   // Update the DAG with the new chain value resulting from argument lowering.
6008   DAG.setRoot(NewRoot);
6009
6010   // Set up the argument values.
6011   unsigned i = 0;
6012   Idx = 1;
6013   if (!FuncInfo->CanLowerReturn) {
6014     // Create a virtual register for the sret pointer, and put in a copy
6015     // from the sret argument into it.
6016     SmallVector<EVT, 1> ValueVTs;
6017     ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
6018     EVT VT = ValueVTs[0];
6019     EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
6020     ISD::NodeType AssertOp = ISD::DELETED_NODE;
6021     SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
6022                                         RegVT, VT, AssertOp);
6023
6024     MachineFunction& MF = SDB->DAG.getMachineFunction();
6025     MachineRegisterInfo& RegInfo = MF.getRegInfo();
6026     unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
6027     FuncInfo->DemoteRegister = SRetReg;
6028     NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(),
6029                                     SRetReg, ArgValue);
6030     DAG.setRoot(NewRoot);
6031
6032     // i indexes lowered arguments.  Bump it past the hidden sret argument.
6033     // Idx indexes LLVM arguments.  Don't touch it.
6034     ++i;
6035   }
6036
6037   for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
6038       ++I, ++Idx) {
6039     SmallVector<SDValue, 4> ArgValues;
6040     SmallVector<EVT, 4> ValueVTs;
6041     ComputeValueVTs(TLI, I->getType(), ValueVTs);
6042     unsigned NumValues = ValueVTs.size();
6043
6044     // If this argument is unused then remember its value. It is used to generate
6045     // debugging information.
6046     if (I->use_empty() && NumValues)
6047       SDB->setUnusedArgValue(I, InVals[i]);
6048
6049     for (unsigned Value = 0; Value != NumValues; ++Value) {
6050       EVT VT = ValueVTs[Value];
6051       EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
6052       unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
6053
6054       if (!I->use_empty()) {
6055         ISD::NodeType AssertOp = ISD::DELETED_NODE;
6056         if (F.paramHasAttr(Idx, Attribute::SExt))
6057           AssertOp = ISD::AssertSext;
6058         else if (F.paramHasAttr(Idx, Attribute::ZExt))
6059           AssertOp = ISD::AssertZext;
6060
6061         ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
6062                                              NumParts, PartVT, VT,
6063                                              AssertOp));
6064       }
6065
6066       i += NumParts;
6067     }
6068
6069     if (!I->use_empty()) {
6070       SDValue Res;
6071       if (!ArgValues.empty())
6072         Res = DAG.getMergeValues(&ArgValues[0], NumValues,
6073                                  SDB->getCurDebugLoc());
6074       SDB->setValue(I, Res);
6075
6076       // If this argument is live outside of the entry block, insert a copy from
6077       // whereever we got it to the vreg that other BB's will reference it as.
6078       SDB->CopyToExportRegsIfNeeded(I);
6079     }
6080   }
6081
6082   assert(i == InVals.size() && "Argument register count mismatch!");
6083
6084   // Finally, if the target has anything special to do, allow it to do so.
6085   // FIXME: this should insert code into the DAG!
6086   EmitFunctionEntryCode();
6087 }
6088
6089 /// Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
6090 /// ensure constants are generated when needed.  Remember the virtual registers
6091 /// that need to be added to the Machine PHI nodes as input.  We cannot just
6092 /// directly add them, because expansion might result in multiple MBB's for one
6093 /// BB.  As such, the start of the BB might correspond to a different MBB than
6094 /// the end.
6095 ///
6096 void
6097 SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
6098   const TerminatorInst *TI = LLVMBB->getTerminator();
6099
6100   SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
6101
6102   // Check successor nodes' PHI nodes that expect a constant to be available
6103   // from this block.
6104   for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
6105     const BasicBlock *SuccBB = TI->getSuccessor(succ);
6106     if (!isa<PHINode>(SuccBB->begin())) continue;
6107     MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
6108
6109     // If this terminator has multiple identical successors (common for
6110     // switches), only handle each succ once.
6111     if (!SuccsHandled.insert(SuccMBB)) continue;
6112
6113     MachineBasicBlock::iterator MBBI = SuccMBB->begin();
6114
6115     // At this point we know that there is a 1-1 correspondence between LLVM PHI
6116     // nodes and Machine PHI nodes, but the incoming operands have not been
6117     // emitted yet.
6118     for (BasicBlock::const_iterator I = SuccBB->begin();
6119          const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
6120       // Ignore dead phi's.
6121       if (PN->use_empty()) continue;
6122
6123       unsigned Reg;
6124       const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
6125
6126       if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
6127         unsigned &RegOut = ConstantsOut[C];
6128         if (RegOut == 0) {
6129           RegOut = FuncInfo.CreateRegs(C->getType());
6130           CopyValueToVirtualRegister(C, RegOut);
6131         }
6132         Reg = RegOut;
6133       } else {
6134         DenseMap<const Value *, unsigned>::iterator I =
6135           FuncInfo.ValueMap.find(PHIOp);
6136         if (I != FuncInfo.ValueMap.end())
6137           Reg = I->second;
6138         else {
6139           assert(isa<AllocaInst>(PHIOp) &&
6140                  FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
6141                  "Didn't codegen value into a register!??");
6142           Reg = FuncInfo.CreateRegs(PHIOp->getType());
6143           CopyValueToVirtualRegister(PHIOp, Reg);
6144         }
6145       }
6146
6147       // Remember that this register needs to added to the machine PHI node as
6148       // the input for this MBB.
6149       SmallVector<EVT, 4> ValueVTs;
6150       ComputeValueVTs(TLI, PN->getType(), ValueVTs);
6151       for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
6152         EVT VT = ValueVTs[vti];
6153         unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
6154         for (unsigned i = 0, e = NumRegisters; i != e; ++i)
6155           FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
6156         Reg += NumRegisters;
6157       }
6158     }
6159   }
6160   ConstantsOut.clear();
6161 }