lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

   1 //===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This implements routines for translating from LLVM IR into SelectionDAG IR.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #define DEBUG_TYPE "isel"
  15 #include "SelectionDAGBuilder.h"
  16 #include "FunctionLoweringInfo.h"
  17 #include "llvm/ADT/BitVector.h"
  18 #include "llvm/ADT/SmallSet.h"
  19 #include "llvm/Analysis/AliasAnalysis.h"
  20 #include "llvm/Constants.h"
  21 #include "llvm/CallingConv.h"
  22 #include "llvm/DerivedTypes.h"
  23 #include "llvm/Function.h"
  24 #include "llvm/GlobalVariable.h"
  25 #include "llvm/InlineAsm.h"
  26 #include "llvm/Instructions.h"
  27 #include "llvm/Intrinsics.h"
  28 #include "llvm/IntrinsicInst.h"
  29 #include "llvm/LLVMContext.h"
  30 #include "llvm/Module.h"
  31 #include "llvm/CodeGen/FastISel.h"
  32 #include "llvm/CodeGen/GCStrategy.h"
  33 #include "llvm/CodeGen/GCMetadata.h"
  34 #include "llvm/CodeGen/MachineFunction.h"
  35 #include "llvm/CodeGen/MachineFrameInfo.h"
  36 #include "llvm/CodeGen/MachineInstrBuilder.h"
  37 #include "llvm/CodeGen/MachineJumpTableInfo.h"
  38 #include "llvm/CodeGen/MachineModuleInfo.h"
  39 #include "llvm/CodeGen/MachineRegisterInfo.h"
  40 #include "llvm/CodeGen/PseudoSourceValue.h"
  41 #include "llvm/CodeGen/SelectionDAG.h"
  42 #include "llvm/CodeGen/DwarfWriter.h"
  43 #include "llvm/Analysis/DebugInfo.h"
  44 #include "llvm/Target/TargetRegisterInfo.h"
  45 #include "llvm/Target/TargetData.h"
  46 #include "llvm/Target/TargetFrameInfo.h"
  47 #include "llvm/Target/TargetInstrInfo.h"
  48 #include "llvm/Target/TargetIntrinsicInfo.h"
  49 #include "llvm/Target/TargetLowering.h"
  50 #include "llvm/Target/TargetOptions.h"
  51 #include "llvm/Support/Compiler.h"
  52 #include "llvm/Support/CommandLine.h"
  53 #include "llvm/Support/Debug.h"
  54 #include "llvm/Support/ErrorHandling.h"
  55 #include "llvm/Support/MathExtras.h"
  56 #include "llvm/Support/raw_ostream.h"
  57 #include <algorithm>
  58 using namespace llvm;
  59
  60 /// LimitFloatPrecision - Generate low-precision inline sequences for
  61 /// some float libcalls (6, 8 or 12 bits).
  62 static unsigned LimitFloatPrecision;
  63
  64 static cl::opt<unsigned, true>
  65 LimitFPPrecision("limit-float-precision",
  66                  cl::desc("Generate low-precision inline sequences "
  67                           "for some float libcalls"),
  68                  cl::location(LimitFloatPrecision),
  69                  cl::init(0));
  70
  71 namespace {
  72   /// RegsForValue - This struct represents the registers (physical or virtual)
  73   /// that a particular set of values is assigned, and the type information about
  74   /// the value. The most common situation is to represent one value at a time,
  75   /// but struct or array values are handled element-wise as multiple values.
  76   /// The splitting of aggregates is performed recursively, so that we never
  77   /// have aggregate-typed registers. The values at this point do not necessarily
  78   /// have legal types, so each value may require one or more registers of some
  79   /// legal type.
  80   ///
  81   struct RegsForValue {
  82     /// TLI - The TargetLowering object.
  83     ///
  84     const TargetLowering *TLI;
  85
  86     /// ValueVTs - The value types of the values, which may not be legal, and
  87     /// may need be promoted or synthesized from one or more registers.
  88     ///
  89     SmallVector<EVT, 4> ValueVTs;
  90
  91     /// RegVTs - The value types of the registers. This is the same size as
  92     /// ValueVTs and it records, for each value, what the type of the assigned
  93     /// register or registers are. (Individual values are never synthesized
  94     /// from more than one type of register.)
  95     ///
  96     /// With virtual registers, the contents of RegVTs is redundant with TLI's
  97     /// getRegisterType member function, however when with physical registers
  98     /// it is necessary to have a separate record of the types.
  99     ///
 100     SmallVector<EVT, 4> RegVTs;
 101
 102     /// Regs - This list holds the registers assigned to the values.
 103     /// Each legal or promoted value requires one register, and each
 104     /// expanded value requires multiple registers.
 105     ///
 106     SmallVector<unsigned, 4> Regs;
 107
 108     RegsForValue() : TLI(0) {}
 109
 110     RegsForValue(const TargetLowering &tli,
 111                  const SmallVector<unsigned, 4> &regs,
 112                  EVT regvt, EVT valuevt)
 113       : TLI(&tli),  ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
 114     RegsForValue(const TargetLowering &tli,
 115                  const SmallVector<unsigned, 4> &regs,
 116                  const SmallVector<EVT, 4> &regvts,
 117                  const SmallVector<EVT, 4> &valuevts)
 118       : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
 119     RegsForValue(LLVMContext &Context, const TargetLowering &tli,
 120                  unsigned Reg, const Type *Ty) : TLI(&tli) {
 121       ComputeValueVTs(tli, Ty, ValueVTs);
 122
 123       for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
 124         EVT ValueVT = ValueVTs[Value];
 125         unsigned NumRegs = TLI->getNumRegisters(Context, ValueVT);
 126         EVT RegisterVT = TLI->getRegisterType(Context, ValueVT);
 127         for (unsigned i = 0; i != NumRegs; ++i)
 128           Regs.push_back(Reg + i);
 129         RegVTs.push_back(RegisterVT);
 130         Reg += NumRegs;
 131       }
 132     }
 133
 134     /// append - Add the specified values to this one.
 135     void append(const RegsForValue &RHS) {
 136       TLI = RHS.TLI;
 137       ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
 138       RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
 139       Regs.append(RHS.Regs.begin(), RHS.Regs.end());
 140     }
 141
 142
 143     /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
 144     /// this value and returns the result as a ValueVTs value.  This uses
 145     /// Chain/Flag as the input and updates them for the output Chain/Flag.
 146     /// If the Flag pointer is NULL, no flag is used.
 147     SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
 148                             SDValue &Chain, SDValue *Flag) const;
 149
 150     /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
 151     /// specified value into the registers specified by this object.  This uses
 152     /// Chain/Flag as the input and updates them for the output Chain/Flag.
 153     /// If the Flag pointer is NULL, no flag is used.
 154     void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
 155                        unsigned Order, SDValue &Chain, SDValue *Flag) const;
 156
 157     /// AddInlineAsmOperands - Add this value to the specified inlineasm node
 158     /// operand list.  This adds the code marker, matching input operand index
 159     /// (if applicable), and includes the number of values added into it.
 160     void AddInlineAsmOperands(unsigned Code,
 161                               bool HasMatching, unsigned MatchingIdx,
 162                               SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
 163   };
 164 }
 165
 166 /// getCopyFromParts - Create a value that contains the specified legal parts
 167 /// combined into the value they represent.  If the parts combine to a type
 168 /// larger then ValueVT then AssertOp can be used to specify whether the extra
 169 /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
 170 /// (ISD::AssertSext).
 171 static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
 172                                 const SDValue *Parts,
 173                                 unsigned NumParts, EVT PartVT, EVT ValueVT,
 174                                 ISD::NodeType AssertOp = ISD::DELETED_NODE) {
 175   assert(NumParts > 0 && "No parts to assemble!");
 176   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 177   SDValue Val = Parts[0];
 178
 179   if (NumParts > 1) {
 180     // Assemble the value from multiple parts.
 181     if (!ValueVT.isVector() && ValueVT.isInteger()) {
 182       unsigned PartBits = PartVT.getSizeInBits();
 183       unsigned ValueBits = ValueVT.getSizeInBits();
 184
 185       // Assemble the power of 2 part.
 186       unsigned RoundParts = NumParts & (NumParts - 1) ?
 187         1 << Log2_32(NumParts) : NumParts;
 188       unsigned RoundBits = PartBits * RoundParts;
 189       EVT RoundVT = RoundBits == ValueBits ?
 190         ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
 191       SDValue Lo, Hi;
 192
 193       EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
 194
 195       if (RoundParts > 2) {
 196         Lo = getCopyFromParts(DAG, dl, Parts, RoundParts/2, PartVT, HalfVT);
 197         Hi = getCopyFromParts(DAG, dl, Parts+RoundParts/2, RoundParts/2,
 198                               PartVT, HalfVT);
 199       } else {
 200         Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);
 201         Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]);
 202       }
 203       if (TLI.isBigEndian())
 204         std::swap(Lo, Hi);
 205       Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi);
 206
 207       if (RoundParts < NumParts) {
 208         // Assemble the trailing non-power-of-2 part.
 209         unsigned OddParts = NumParts - RoundParts;
 210         EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
 211         Hi = getCopyFromParts(DAG, dl,
 212                               Parts+RoundParts, OddParts, PartVT, OddVT);
 213
 214         // Combine the round and odd parts.
 215         Lo = Val;
 216         if (TLI.isBigEndian())
 217           std::swap(Lo, Hi);
 218         EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
 219         Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi);
 220         Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi,
 221                          DAG.getConstant(Lo.getValueType().getSizeInBits(),
 222                                          TLI.getPointerTy()));
 223         Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo);
 224         Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi);
 225       }
 226     } else if (ValueVT.isVector()) {
 227       // Handle a multi-element vector.
 228       EVT IntermediateVT, RegisterVT;
 229       unsigned NumIntermediates;
 230       unsigned NumRegs =
 231         TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
 232                                    NumIntermediates, RegisterVT);
 233       assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
 234       NumParts = NumRegs; // Silence a compiler warning.
 235       assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
 236       assert(RegisterVT == Parts[0].getValueType() &&
 237              "Part type doesn't match part!");
 238
 239       // Assemble the parts into intermediate operands.
 240       SmallVector<SDValue, 8> Ops(NumIntermediates);
 241       if (NumIntermediates == NumParts) {
 242         // If the register was not expanded, truncate or copy the value,
 243         // as appropriate.
 244         for (unsigned i = 0; i != NumParts; ++i)
 245           Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1,
 246                                     PartVT, IntermediateVT);
 247       } else if (NumParts > 0) {
 248         // If the intermediate type was expanded, build the intermediate operands
 249         // from the parts.
 250         assert(NumParts % NumIntermediates == 0 &&
 251                "Must expand into a divisible number of parts!");
 252         unsigned Factor = NumParts / NumIntermediates;
 253         for (unsigned i = 0; i != NumIntermediates; ++i)
 254           Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor,
 255                                     PartVT, IntermediateVT);
 256       }
 257
 258       // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the intermediate
 259       // operands.
 260       Val = DAG.getNode(IntermediateVT.isVector() ?
 261                         ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl,
 262                         ValueVT, &Ops[0], NumIntermediates);
 263     } else if (PartVT.isFloatingPoint()) {
 264       // FP split into multiple FP parts (for ppcf128)
 265       assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
 266              "Unexpected split");
 267       SDValue Lo, Hi;
 268       Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]);
 269       Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]);
 270       if (TLI.isBigEndian())
 271         std::swap(Lo, Hi);
 272       Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);
 273     } else {
 274       // FP split into integer parts (soft fp)
 275       assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
 276              !PartVT.isVector() && "Unexpected split");
 277       EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
 278       Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
 279     }
 280   }
 281
 282   // There is now one part, held in Val.  Correct it to match ValueVT.
 283   PartVT = Val.getValueType();
 284
 285   if (PartVT == ValueVT)
 286     return Val;
 287
 288   if (PartVT.isVector()) {
 289     assert(ValueVT.isVector() && "Unknown vector conversion!");
 290     return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
 291   }
 292
 293   if (ValueVT.isVector()) {
 294     assert(ValueVT.getVectorElementType() == PartVT &&
 295            ValueVT.getVectorNumElements() == 1 &&
 296            "Only trivial scalar-to-vector conversions should get here!");
 297     return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val);
 298   }
 299
 300   if (PartVT.isInteger() &&
 301       ValueVT.isInteger()) {
 302     if (ValueVT.bitsLT(PartVT)) {
 303       // For a truncate, see if we have any information to
 304       // indicate whether the truncated bits will always be
 305       // zero or sign-extension.
 306       if (AssertOp != ISD::DELETED_NODE)
 307         Val = DAG.getNode(AssertOp, dl, PartVT, Val,
 308                           DAG.getValueType(ValueVT));
 309       return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
 310     } else {
 311       return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val);
 312     }
 313   }
 314
 315   if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
 316     if (ValueVT.bitsLT(Val.getValueType()))
 317       // FP_ROUND's are always exact here.
 318       return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val,
 319                          DAG.getIntPtrConstant(1));
 320     return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val);
 321   }
 322
 323   if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
 324     return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
 325
 326   llvm_unreachable("Unknown mismatch!");
 327   return SDValue();
 328 }
 329
 330 /// getCopyToParts - Create a series of nodes that contain the specified value
 331 /// split into legal parts.  If the parts contain more bits than Val, then, for
 332 /// integers, ExtendKind can be used to specify how to generate the extra bits.
 333 static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val,
 334                            SDValue *Parts, unsigned NumParts, EVT PartVT,
 335                            ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
 336   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 337   EVT PtrVT = TLI.getPointerTy();
 338   EVT ValueVT = Val.getValueType();
 339   unsigned PartBits = PartVT.getSizeInBits();
 340   unsigned OrigNumParts = NumParts;
 341   assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
 342
 343   if (!NumParts)
 344     return;
 345
 346   if (!ValueVT.isVector()) {
 347     if (PartVT == ValueVT) {
 348       assert(NumParts == 1 && "No-op copy with multiple parts!");
 349       Parts[0] = Val;
 350       return;
 351     }
 352
 353     if (NumParts * PartBits > ValueVT.getSizeInBits()) {
 354       // If the parts cover more bits than the value has, promote the value.
 355       if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
 356         assert(NumParts == 1 && "Do not know what to promote to!");
 357         Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val);
 358       } else if (PartVT.isInteger() && ValueVT.isInteger()) {
 359         ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
 360         Val = DAG.getNode(ExtendKind, dl, ValueVT, Val);
 361       } else {
 362         llvm_unreachable("Unknown mismatch!");
 363       }
 364     } else if (PartBits == ValueVT.getSizeInBits()) {
 365       // Different types of the same size.
 366       assert(NumParts == 1 && PartVT != ValueVT);
 367       Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
 368     } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
 369       // If the parts cover less bits than value has, truncate the value.
 370       if (PartVT.isInteger() && ValueVT.isInteger()) {
 371         ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
 372         Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
 373       } else {
 374         llvm_unreachable("Unknown mismatch!");
 375       }
 376     }
 377
 378     // The value may have changed - recompute ValueVT.
 379     ValueVT = Val.getValueType();
 380     assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
 381            "Failed to tile the value with PartVT!");
 382
 383     if (NumParts == 1) {
 384       assert(PartVT == ValueVT && "Type conversion failed!");
 385       Parts[0] = Val;
 386       return;
 387     }
 388
 389     // Expand the value into multiple parts.
 390     if (NumParts & (NumParts - 1)) {
 391       // The number of parts is not a power of 2.  Split off and copy the tail.
 392       assert(PartVT.isInteger() && ValueVT.isInteger() &&
 393              "Do not know what to expand to!");
 394       unsigned RoundParts = 1 << Log2_32(NumParts);
 395       unsigned RoundBits = RoundParts * PartBits;
 396       unsigned OddParts = NumParts - RoundParts;
 397       SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,
 398                                    DAG.getConstant(RoundBits,
 399                                                    TLI.getPointerTy()));
 400       getCopyToParts(DAG, dl, OddVal, Parts + RoundParts, OddParts, PartVT);
 401       if (TLI.isBigEndian())
 402         // The odd parts were reversed by getCopyToParts - unreverse them.
 403         std::reverse(Parts + RoundParts, Parts + NumParts);
 404       NumParts = RoundParts;
 405       ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
 406       Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
 407     }
 408
 409     // The number of parts is a power of 2.  Repeatedly bisect the value using
 410     // EXTRACT_ELEMENT.
 411     Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl,
 412                            EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()),
 413                            Val);
 414     for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
 415       for (unsigned i = 0; i < NumParts; i += StepSize) {
 416         unsigned ThisBits = StepSize * PartBits / 2;
 417         EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
 418         SDValue &Part0 = Parts[i];
 419         SDValue &Part1 = Parts[i+StepSize/2];
 420
 421         Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
 422                             ThisVT, Part0,
 423                             DAG.getConstant(1, PtrVT));
 424         Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
 425                             ThisVT, Part0,
 426                             DAG.getConstant(0, PtrVT));
 427
 428         if (ThisBits == PartBits && ThisVT != PartVT) {
 429           Part0 = DAG.getNode(ISD::BIT_CONVERT, dl,
 430                                                 PartVT, Part0);
 431           Part1 = DAG.getNode(ISD::BIT_CONVERT, dl,
 432                                                 PartVT, Part1);
 433         }
 434       }
 435     }
 436
 437     if (TLI.isBigEndian())
 438       std::reverse(Parts, Parts + OrigNumParts);
 439
 440     return;
 441   }
 442
 443   // Vector ValueVT.
 444   if (NumParts == 1) {
 445     if (PartVT != ValueVT) {
 446       if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
 447         Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
 448       } else {
 449         assert(ValueVT.getVectorElementType() == PartVT &&
 450                ValueVT.getVectorNumElements() == 1 &&
 451                "Only trivial vector-to-scalar conversions should get here!");
 452         Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
 453                           PartVT, Val,
 454                           DAG.getConstant(0, PtrVT));
 455       }
 456     }
 457
 458     Parts[0] = Val;
 459     return;
 460   }
 461
 462   // Handle a multi-element vector.
 463   EVT IntermediateVT, RegisterVT;
 464   unsigned NumIntermediates;
 465   unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
 466                               IntermediateVT, NumIntermediates, RegisterVT);
 467   unsigned NumElements = ValueVT.getVectorNumElements();
 468
 469   assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
 470   NumParts = NumRegs; // Silence a compiler warning.
 471   assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
 472
 473   // Split the vector into intermediate operands.
 474   SmallVector<SDValue, 8> Ops(NumIntermediates);
 475   for (unsigned i = 0; i != NumIntermediates; ++i)
 476     if (IntermediateVT.isVector())
 477       Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
 478                            IntermediateVT, Val,
 479                            DAG.getConstant(i * (NumElements / NumIntermediates),
 480                                            PtrVT));
 481     else
 482       Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
 483                            IntermediateVT, Val,
 484                            DAG.getConstant(i, PtrVT));
 485
 486   // Split the intermediate operands into legal parts.
 487   if (NumParts == NumIntermediates) {
 488     // If the register was not expanded, promote or copy the value,
 489     // as appropriate.
 490     for (unsigned i = 0; i != NumParts; ++i)
 491       getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT);
 492   } else if (NumParts > 0) {
 493     // If the intermediate type was expanded, split each the value into
 494     // legal parts.
 495     assert(NumParts % NumIntermediates == 0 &&
 496            "Must expand into a divisible number of parts!");
 497     unsigned Factor = NumParts / NumIntermediates;
 498     for (unsigned i = 0; i != NumIntermediates; ++i)
 499       getCopyToParts(DAG, dl, Ops[i], &Parts[i * Factor], Factor, PartVT);
 500   }
 501 }
 502
 503
 504 void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
 505   AA = &aa;
 506   GFI = gfi;
 507   TD = DAG.getTarget().getTargetData();
 508 }
 509
 510 /// clear - Clear out the curret SelectionDAG and the associated
 511 /// state and prepare this SelectionDAGBuilder object to be used
 512 /// for a new block. This doesn't clear out information about
 513 /// additional blocks that are needed to complete switch lowering
 514 /// or PHI node updating; that information is cleared out as it is
 515 /// consumed.
 516 void SelectionDAGBuilder::clear() {
 517   NodeMap.clear();
 518   PendingLoads.clear();
 519   PendingExports.clear();
 520   EdgeMapping.clear();
 521   DAG.clear();
 522   CurDebugLoc = DebugLoc::getUnknownLoc();
 523   HasTailCall = false;
 524 }
 525
 526 /// getRoot - Return the current virtual root of the Selection DAG,
 527 /// flushing any PendingLoad items. This must be done before emitting
 528 /// a store or any other node that may need to be ordered after any
 529 /// prior load instructions.
 530 ///
 531 SDValue SelectionDAGBuilder::getRoot() {
 532   if (PendingLoads.empty())
 533     return DAG.getRoot();
 534
 535   if (PendingLoads.size() == 1) {
 536     SDValue Root = PendingLoads[0];
 537     DAG.setRoot(Root);
 538     PendingLoads.clear();
 539     return Root;
 540   }
 541
 542   // Otherwise, we have to make a token factor node.
 543   SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
 544                                &PendingLoads[0], PendingLoads.size());
 545   PendingLoads.clear();
 546   DAG.setRoot(Root);
 547   return Root;
 548 }
 549
 550 /// getControlRoot - Similar to getRoot, but instead of flushing all the
 551 /// PendingLoad items, flush all the PendingExports items. It is necessary
 552 /// to do this before emitting a terminator instruction.
 553 ///
 554 SDValue SelectionDAGBuilder::getControlRoot() {
 555   SDValue Root = DAG.getRoot();
 556
 557   if (PendingExports.empty())
 558     return Root;
 559
 560   // Turn all of the CopyToReg chains into one factored node.
 561   if (Root.getOpcode() != ISD::EntryToken) {
 562     unsigned i = 0, e = PendingExports.size();
 563     for (; i != e; ++i) {
 564       assert(PendingExports[i].getNode()->getNumOperands() > 1);
 565       if (PendingExports[i].getNode()->getOperand(0) == Root)
 566         break;  // Don't add the root if we already indirectly depend on it.
 567     }
 568
 569     if (i == e)
 570       PendingExports.push_back(Root);
 571   }
 572
 573   Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
 574                      &PendingExports[0],
 575                      PendingExports.size());
 576   PendingExports.clear();
 577   DAG.setRoot(Root);
 578   return Root;
 579 }
 580
 581 void SelectionDAGBuilder::visit(Instruction &I) {
 582   visit(I.getOpcode(), I);
 583 }
 584
 585 void SelectionDAGBuilder::visit(unsigned Opcode, User &I) {
 586   // We're processing a new instruction.
 587   ++SDNodeOrder;
 588
 589   // Note: this doesn't use InstVisitor, because it has to work with
 590   // ConstantExpr's in addition to instructions.
 591   switch (Opcode) {
 592   default: llvm_unreachable("Unknown instruction type encountered!");
 593     // Build the switch statement using the Instruction.def file.
 594 #define HANDLE_INST(NUM, OPCODE, CLASS) \
 595   case Instruction::OPCODE: return visit##OPCODE((CLASS&)I);
 596 #include "llvm/Instruction.def"
 597   }
 598 }
 599
 600 SDValue SelectionDAGBuilder::getValue(const Value *V) {
 601   SDValue &N = NodeMap[V];
 602   if (N.getNode()) return N;
 603
 604   if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
 605     EVT VT = TLI.getValueType(V->getType(), true);
 606
 607     if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
 608       return N = DAG.getConstant(*CI, VT);
 609
 610     if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
 611       return N = DAG.getGlobalAddress(GV, VT);
 612
 613     if (isa<ConstantPointerNull>(C))
 614       return N = DAG.getConstant(0, TLI.getPointerTy());
 615
 616     if (ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 617       return N = DAG.getConstantFP(*CFP, VT);
 618
 619     if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
 620       return N = DAG.getUNDEF(VT);
 621
 622     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
 623       visit(CE->getOpcode(), *CE);
 624       SDValue N1 = NodeMap[V];
 625       assert(N1.getNode() && "visit didn't populate the ValueMap!");
 626       return N1;
 627     }
 628
 629     if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
 630       SmallVector<SDValue, 4> Constants;
 631       for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
 632            OI != OE; ++OI) {
 633         SDNode *Val = getValue(*OI).getNode();
 634         // If the operand is an empty aggregate, there are no values.
 635         if (!Val) continue;
 636         // Add each leaf value from the operand to the Constants list
 637         // to form a flattened list of all the values.
 638         for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
 639           Constants.push_back(SDValue(Val, i));
 640       }
 641
 642       SDValue Res = DAG.getMergeValues(&Constants[0], Constants.size(),
 643                                        getCurDebugLoc());
 644       if (DisableScheduling)
 645         DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
 646       return Res;
 647     }
 648
 649     if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType())) {
 650       assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
 651              "Unknown struct or array constant!");
 652
 653       SmallVector<EVT, 4> ValueVTs;
 654       ComputeValueVTs(TLI, C->getType(), ValueVTs);
 655       unsigned NumElts = ValueVTs.size();
 656       if (NumElts == 0)
 657         return SDValue(); // empty struct
 658       SmallVector<SDValue, 4> Constants(NumElts);
 659       for (unsigned i = 0; i != NumElts; ++i) {
 660         EVT EltVT = ValueVTs[i];
 661         if (isa<UndefValue>(C))
 662           Constants[i] = DAG.getUNDEF(EltVT);
 663         else if (EltVT.isFloatingPoint())
 664           Constants[i] = DAG.getConstantFP(0, EltVT);
 665         else
 666           Constants[i] = DAG.getConstant(0, EltVT);
 667       }
 668
 669       SDValue Res = DAG.getMergeValues(&Constants[0], NumElts,
 670                                        getCurDebugLoc());
 671       if (DisableScheduling)
 672         DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
 673       return Res;
 674     }
 675
 676     if (BlockAddress *BA = dyn_cast<BlockAddress>(C))
 677       return DAG.getBlockAddress(BA, VT);
 678
 679     const VectorType *VecTy = cast<VectorType>(V->getType());
 680     unsigned NumElements = VecTy->getNumElements();
 681
 682     // Now that we know the number and type of the elements, get that number of
 683     // elements into the Ops array based on what kind of constant it is.
 684     SmallVector<SDValue, 16> Ops;
 685     if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
 686       for (unsigned i = 0; i != NumElements; ++i)
 687         Ops.push_back(getValue(CP->getOperand(i)));
 688     } else {
 689       assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
 690       EVT EltVT = TLI.getValueType(VecTy->getElementType());
 691
 692       SDValue Op;
 693       if (EltVT.isFloatingPoint())
 694         Op = DAG.getConstantFP(0, EltVT);
 695       else
 696         Op = DAG.getConstant(0, EltVT);
 697       Ops.assign(NumElements, Op);
 698     }
 699
 700     // Create a BUILD_VECTOR node.
 701     SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
 702                               VT, &Ops[0], Ops.size());
 703     if (DisableScheduling)
 704       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
 705
 706     return NodeMap[V] = Res;
 707   }
 708
 709   // If this is a static alloca, generate it as the frameindex instead of
 710   // computation.
 711   if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
 712     DenseMap<const AllocaInst*, int>::iterator SI =
 713       FuncInfo.StaticAllocaMap.find(AI);
 714     if (SI != FuncInfo.StaticAllocaMap.end())
 715       return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
 716   }
 717
 718   unsigned InReg = FuncInfo.ValueMap[V];
 719   assert(InReg && "Value not in map!");
 720
 721   RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
 722   SDValue Chain = DAG.getEntryNode();
 723   return RFV.getCopyFromRegs(DAG, getCurDebugLoc(),
 724                              SDNodeOrder, Chain, NULL);
 725 }
 726
 727 /// Get the EVTs and ArgFlags collections that represent the return type
 728 /// of the given function.  This does not require a DAG or a return value, and
 729 /// is suitable for use before any DAGs for the function are constructed.
 730 static void getReturnInfo(const Type* ReturnType,
 731                    Attributes attr, SmallVectorImpl<EVT> &OutVTs,
 732                    SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags,
 733                    TargetLowering &TLI,
 734                    SmallVectorImpl<uint64_t> *Offsets = 0) {
 735   SmallVector<EVT, 4> ValueVTs;
 736   ComputeValueVTs(TLI, ReturnType, ValueVTs, Offsets);
 737   unsigned NumValues = ValueVTs.size();
 738   if ( NumValues == 0 ) return;
 739
 740   for (unsigned j = 0, f = NumValues; j != f; ++j) {
 741     EVT VT = ValueVTs[j];
 742     ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
 743
 744     if (attr & Attribute::SExt)
 745       ExtendKind = ISD::SIGN_EXTEND;
 746     else if (attr & Attribute::ZExt)
 747       ExtendKind = ISD::ZERO_EXTEND;
 748
 749     // FIXME: C calling convention requires the return type to be promoted to
 750     // at least 32-bit. But this is not necessary for non-C calling
 751     // conventions. The frontend should mark functions whose return values
 752     // require promoting with signext or zeroext attributes.
 753     if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
 754       EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
 755       if (VT.bitsLT(MinVT))
 756         VT = MinVT;
 757     }
 758
 759     unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
 760     EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
 761     // 'inreg' on function refers to return value
 762     ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
 763     if (attr & Attribute::InReg)
 764       Flags.setInReg();
 765
 766     // Propagate extension type if any
 767     if (attr & Attribute::SExt)
 768       Flags.setSExt();
 769     else if (attr & Attribute::ZExt)
 770       Flags.setZExt();
 771
 772     for (unsigned i = 0; i < NumParts; ++i) {
 773       OutVTs.push_back(PartVT);
 774       OutFlags.push_back(Flags);
 775     }
 776   }
 777 }
 778
 779 void SelectionDAGBuilder::visitRet(ReturnInst &I) {
 780   SDValue Chain = getControlRoot();
 781   SmallVector<ISD::OutputArg, 8> Outs;
 782   FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
 783
 784   if (!FLI.CanLowerReturn) {
 785     unsigned DemoteReg = FLI.DemoteRegister;
 786     const Function *F = I.getParent()->getParent();
 787
 788     // Emit a store of the return value through the virtual register.
 789     // Leave Outs empty so that LowerReturn won't try to load return
 790     // registers the usual way.
 791     SmallVector<EVT, 1> PtrValueVTs;
 792     ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
 793                     PtrValueVTs);
 794
 795     SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
 796     SDValue RetOp = getValue(I.getOperand(0));
 797
 798     SmallVector<EVT, 4> ValueVTs;
 799     SmallVector<uint64_t, 4> Offsets;
 800     ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
 801     unsigned NumValues = ValueVTs.size();
 802
 803     SmallVector<SDValue, 4> Chains(NumValues);
 804     EVT PtrVT = PtrValueVTs[0];
 805     for (unsigned i = 0; i != NumValues; ++i) {
 806       SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr,
 807                                 DAG.getConstant(Offsets[i], PtrVT));
 808       Chains[i] =
 809         DAG.getStore(Chain, getCurDebugLoc(),
 810                      SDValue(RetOp.getNode(), RetOp.getResNo() + i),
 811                      Add, NULL, Offsets[i], false, 0);
 812
 813       if (DisableScheduling) {
 814         DAG.AssignOrdering(Add.getNode(), SDNodeOrder);
 815         DAG.AssignOrdering(Chains[i].getNode(), SDNodeOrder);
 816       }
 817     }
 818
 819     Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
 820                         MVT::Other, &Chains[0], NumValues);
 821
 822     if (DisableScheduling)
 823       DAG.AssignOrdering(Chain.getNode(), SDNodeOrder);
 824   } else {
 825     for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
 826       SmallVector<EVT, 4> ValueVTs;
 827       ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
 828       unsigned NumValues = ValueVTs.size();
 829       if (NumValues == 0) continue;
 830
 831       SDValue RetOp = getValue(I.getOperand(i));
 832       for (unsigned j = 0, f = NumValues; j != f; ++j) {
 833         EVT VT = ValueVTs[j];
 834
 835         ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
 836
 837         const Function *F = I.getParent()->getParent();
 838         if (F->paramHasAttr(0, Attribute::SExt))
 839           ExtendKind = ISD::SIGN_EXTEND;
 840         else if (F->paramHasAttr(0, Attribute::ZExt))
 841           ExtendKind = ISD::ZERO_EXTEND;
 842
 843         // FIXME: C calling convention requires the return type to be promoted to
 844         // at least 32-bit. But this is not necessary for non-C calling
 845         // conventions. The frontend should mark functions whose return values
 846         // require promoting with signext or zeroext attributes.
 847         if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
 848           EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
 849           if (VT.bitsLT(MinVT))
 850             VT = MinVT;
 851         }
 852
 853         unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
 854         EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
 855         SmallVector<SDValue, 4> Parts(NumParts);
 856         getCopyToParts(DAG, getCurDebugLoc(),
 857                        SDValue(RetOp.getNode(), RetOp.getResNo() + j),
 858                        &Parts[0], NumParts, PartVT, ExtendKind);
 859
 860         // 'inreg' on function refers to return value
 861         ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
 862         if (F->paramHasAttr(0, Attribute::InReg))
 863           Flags.setInReg();
 864
 865         // Propagate extension type if any
 866         if (F->paramHasAttr(0, Attribute::SExt))
 867           Flags.setSExt();
 868         else if (F->paramHasAttr(0, Attribute::ZExt))
 869           Flags.setZExt();
 870
 871         for (unsigned i = 0; i < NumParts; ++i)
 872           Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true));
 873       }
 874     }
 875   }
 876
 877   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
 878   CallingConv::ID CallConv =
 879     DAG.getMachineFunction().getFunction()->getCallingConv();
 880   Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
 881                           Outs, getCurDebugLoc(), DAG);
 882
 883   // Verify that the target's LowerReturn behaved as expected.
 884   assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
 885          "LowerReturn didn't return a valid chain!");
 886
 887   // Update the DAG with the new chain value resulting from return lowering.
 888   DAG.setRoot(Chain);
 889
 890   if (DisableScheduling)
 891     DAG.AssignOrdering(Chain.getNode(), SDNodeOrder);
 892 }
 893
 894 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
 895 /// created for it, emit nodes to copy the value into the virtual
 896 /// registers.
 897 void SelectionDAGBuilder::CopyToExportRegsIfNeeded(Value *V) {
 898   if (!V->use_empty()) {
 899     DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
 900     if (VMI != FuncInfo.ValueMap.end())
 901       CopyValueToVirtualRegister(V, VMI->second);
 902   }
 903 }
 904
 905 /// ExportFromCurrentBlock - If this condition isn't known to be exported from
 906 /// the current basic block, add it to ValueMap now so that we'll get a
 907 /// CopyTo/FromReg.
 908 void SelectionDAGBuilder::ExportFromCurrentBlock(Value *V) {
 909   // No need to export constants.
 910   if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
 911
 912   // Already exported?
 913   if (FuncInfo.isExportedInst(V)) return;
 914
 915   unsigned Reg = FuncInfo.InitializeRegForValue(V);
 916   CopyValueToVirtualRegister(V, Reg);
 917 }
 918
 919 bool SelectionDAGBuilder::isExportableFromCurrentBlock(Value *V,
 920                                                      const BasicBlock *FromBB) {
 921   // The operands of the setcc have to be in this block.  We don't know
 922   // how to export them from some other block.
 923   if (Instruction *VI = dyn_cast<Instruction>(V)) {
 924     // Can export from current BB.
 925     if (VI->getParent() == FromBB)
 926       return true;
 927
 928     // Is already exported, noop.
 929     return FuncInfo.isExportedInst(V);
 930   }
 931
 932   // If this is an argument, we can export it if the BB is the entry block or
 933   // if it is already exported.
 934   if (isa<Argument>(V)) {
 935     if (FromBB == &FromBB->getParent()->getEntryBlock())
 936       return true;
 937
 938     // Otherwise, can only export this if it is already exported.
 939     return FuncInfo.isExportedInst(V);
 940   }
 941
 942   // Otherwise, constants can always be exported.
 943   return true;
 944 }
 945
 946 static bool InBlock(const Value *V, const BasicBlock *BB) {
 947   if (const Instruction *I = dyn_cast<Instruction>(V))
 948     return I->getParent() == BB;
 949   return true;
 950 }
 951
 952 /// getFCmpCondCode - Return the ISD condition code corresponding to
 953 /// the given LLVM IR floating-point condition code.  This includes
 954 /// consideration of global floating-point math flags.
 955 ///
 956 static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) {
 957   ISD::CondCode FPC, FOC;
 958   switch (Pred) {
 959   case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
 960   case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
 961   case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
 962   case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
 963   case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
 964   case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
 965   case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
 966   case FCmpInst::FCMP_ORD:   FOC = FPC = ISD::SETO;   break;
 967   case FCmpInst::FCMP_UNO:   FOC = FPC = ISD::SETUO;  break;
 968   case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
 969   case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
 970   case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
 971   case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
 972   case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
 973   case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
 974   case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
 975   default:
 976     llvm_unreachable("Invalid FCmp predicate opcode!");
 977     FOC = FPC = ISD::SETFALSE;
 978     break;
 979   }
 980   if (FiniteOnlyFPMath())
 981     return FOC;
 982   else
 983     return FPC;
 984 }
 985
 986 /// getICmpCondCode - Return the ISD condition code corresponding to
 987 /// the given LLVM IR integer condition code.
 988 ///
 989 static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) {
 990   switch (Pred) {
 991   case ICmpInst::ICMP_EQ:  return ISD::SETEQ;
 992   case ICmpInst::ICMP_NE:  return ISD::SETNE;
 993   case ICmpInst::ICMP_SLE: return ISD::SETLE;
 994   case ICmpInst::ICMP_ULE: return ISD::SETULE;
 995   case ICmpInst::ICMP_SGE: return ISD::SETGE;
 996   case ICmpInst::ICMP_UGE: return ISD::SETUGE;
 997   case ICmpInst::ICMP_SLT: return ISD::SETLT;
 998   case ICmpInst::ICMP_ULT: return ISD::SETULT;
 999   case ICmpInst::ICMP_SGT: return ISD::SETGT;
1000   case ICmpInst::ICMP_UGT: return ISD::SETUGT;
1001   default:
1002     llvm_unreachable("Invalid ICmp predicate opcode!");
1003     return ISD::SETNE;
1004   }
1005 }
1006
1007 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
1008 /// This function emits a branch and is used at the leaves of an OR or an
1009 /// AND operator tree.
1010 ///
1011 void
1012 SelectionDAGBuilder::EmitBranchForMergedCondition(Value *Cond,
1013                                                   MachineBasicBlock *TBB,
1014                                                   MachineBasicBlock *FBB,
1015                                                   MachineBasicBlock *CurBB) {
1016   const BasicBlock *BB = CurBB->getBasicBlock();
1017
1018   // If the leaf of the tree is a comparison, merge the condition into
1019   // the caseblock.
1020   if (CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
1021     // The operands of the cmp have to be in this block.  We don't know
1022     // how to export them from some other block.  If this is the first block
1023     // of the sequence, no exporting is needed.
1024     if (CurBB == CurMBB ||
1025         (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
1026          isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
1027       ISD::CondCode Condition;
1028       if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
1029         Condition = getICmpCondCode(IC->getPredicate());
1030       } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
1031         Condition = getFCmpCondCode(FC->getPredicate());
1032       } else {
1033         Condition = ISD::SETEQ; // silence warning.
1034         llvm_unreachable("Unknown compare instruction");
1035       }
1036
1037       CaseBlock CB(Condition, BOp->getOperand(0),
1038                    BOp->getOperand(1), NULL, TBB, FBB, CurBB);
1039       SwitchCases.push_back(CB);
1040       return;
1041     }
1042   }
1043
1044   // Create a CaseBlock record representing this branch.
1045   CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
1046                NULL, TBB, FBB, CurBB);
1047   SwitchCases.push_back(CB);
1048 }
1049
1050 /// FindMergedConditions - If Cond is an expression like
1051 void SelectionDAGBuilder::FindMergedConditions(Value *Cond,
1052                                                MachineBasicBlock *TBB,
1053                                                MachineBasicBlock *FBB,
1054                                                MachineBasicBlock *CurBB,
1055                                                unsigned Opc) {
1056   // If this node is not part of the or/and tree, emit it as a branch.
1057   Instruction *BOp = dyn_cast<Instruction>(Cond);
1058   if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
1059       (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
1060       BOp->getParent() != CurBB->getBasicBlock() ||
1061       !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
1062       !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
1063     EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB);
1064     return;
1065   }
1066
1067   //  Create TmpBB after CurBB.
1068   MachineFunction::iterator BBI = CurBB;
1069   MachineFunction &MF = DAG.getMachineFunction();
1070   MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
1071   CurBB->getParent()->insert(++BBI, TmpBB);
1072
1073   if (Opc == Instruction::Or) {
1074     // Codegen X | Y as:
1075     //   jmp_if_X TBB
1076     //   jmp TmpBB
1077     // TmpBB:
1078     //   jmp_if_Y TBB
1079     //   jmp FBB
1080     //
1081
1082     // Emit the LHS condition.
1083     FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc);
1084
1085     // Emit the RHS condition into TmpBB.
1086     FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
1087   } else {
1088     assert(Opc == Instruction::And && "Unknown merge op!");
1089     // Codegen X & Y as:
1090     //   jmp_if_X TmpBB
1091     //   jmp FBB
1092     // TmpBB:
1093     //   jmp_if_Y TBB
1094     //   jmp FBB
1095     //
1096     //  This requires creation of TmpBB after CurBB.
1097
1098     // Emit the LHS condition.
1099     FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc);
1100
1101     // Emit the RHS condition into TmpBB.
1102     FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
1103   }
1104 }
1105
1106 /// If the set of cases should be emitted as a series of branches, return true.
1107 /// If we should emit this as a bunch of and/or'd together conditions, return
1108 /// false.
1109 bool
1110 SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
1111   if (Cases.size() != 2) return true;
1112
1113   // If this is two comparisons of the same values or'd or and'd together, they
1114   // will get folded into a single comparison, so don't emit two blocks.
1115   if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
1116        Cases[0].CmpRHS == Cases[1].CmpRHS) ||
1117       (Cases[0].CmpRHS == Cases[1].CmpLHS &&
1118        Cases[0].CmpLHS == Cases[1].CmpRHS)) {
1119     return false;
1120   }
1121
1122   return true;
1123 }
1124
1125 void SelectionDAGBuilder::visitBr(BranchInst &I) {
1126   // Update machine-CFG edges.
1127   MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
1128
1129   // Figure out which block is immediately after the current one.
1130   MachineBasicBlock *NextBlock = 0;
1131   MachineFunction::iterator BBI = CurMBB;
1132   if (++BBI != FuncInfo.MF->end())
1133     NextBlock = BBI;
1134
1135   if (I.isUnconditional()) {
1136     // Update machine-CFG edges.
1137     CurMBB->addSuccessor(Succ0MBB);
1138
1139     // If this is not a fall-through branch, emit the branch.
1140     if (Succ0MBB != NextBlock) {
1141       SDValue V = DAG.getNode(ISD::BR, getCurDebugLoc(),
1142                               MVT::Other, getControlRoot(),
1143                               DAG.getBasicBlock(Succ0MBB));
1144       DAG.setRoot(V);
1145
1146       if (DisableScheduling)
1147         DAG.AssignOrdering(V.getNode(), SDNodeOrder);
1148     }
1149
1150     return;
1151   }
1152
1153   // If this condition is one of the special cases we handle, do special stuff
1154   // now.
1155   Value *CondVal = I.getCondition();
1156   MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
1157
1158   // If this is a series of conditions that are or'd or and'd together, emit
1159   // this as a sequence of branches instead of setcc's with and/or operations.
1160   // For example, instead of something like:
1161   //     cmp A, B
1162   //     C = seteq
1163   //     cmp D, E
1164   //     F = setle
1165   //     or C, F
1166   //     jnz foo
1167   // Emit:
1168   //     cmp A, B
1169   //     je foo
1170   //     cmp D, E
1171   //     jle foo
1172   //
1173   if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
1174     if (BOp->hasOneUse() &&
1175         (BOp->getOpcode() == Instruction::And ||
1176          BOp->getOpcode() == Instruction::Or)) {
1177       FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode());
1178       // If the compares in later blocks need to use values not currently
1179       // exported from this block, export them now.  This block should always
1180       // be the first entry.
1181       assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!");
1182
1183       // Allow some cases to be rejected.
1184       if (ShouldEmitAsBranches(SwitchCases)) {
1185         for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
1186           ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
1187           ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
1188         }
1189
1190         // Emit the branch for this block.
1191         visitSwitchCase(SwitchCases[0]);
1192         SwitchCases.erase(SwitchCases.begin());
1193         return;
1194       }
1195
1196       // Okay, we decided not to do this, remove any inserted MBB's and clear
1197       // SwitchCases.
1198       for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
1199         FuncInfo.MF->erase(SwitchCases[i].ThisBB);
1200
1201       SwitchCases.clear();
1202     }
1203   }
1204
1205   // Create a CaseBlock record representing this branch.
1206   CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
1207                NULL, Succ0MBB, Succ1MBB, CurMBB);
1208
1209   // Use visitSwitchCase to actually insert the fast branch sequence for this
1210   // cond branch.
1211   visitSwitchCase(CB);
1212 }
1213
1214 /// visitSwitchCase - Emits the necessary code to represent a single node in
1215 /// the binary search tree resulting from lowering a switch instruction.
1216 void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) {
1217   SDValue Cond;
1218   SDValue CondLHS = getValue(CB.CmpLHS);
1219   DebugLoc dl = getCurDebugLoc();
1220
1221   // Build the setcc now.
1222   if (CB.CmpMHS == NULL) {
1223     // Fold "(X == true)" to X and "(X == false)" to !X to
1224     // handle common cases produced by branch lowering.
1225     if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
1226         CB.CC == ISD::SETEQ)
1227       Cond = CondLHS;
1228     else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
1229              CB.CC == ISD::SETEQ) {
1230       SDValue True = DAG.getConstant(1, CondLHS.getValueType());
1231       Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
1232     } else
1233       Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
1234   } else {
1235     assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
1236
1237     const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
1238     const APInt& High  = cast<ConstantInt>(CB.CmpRHS)->getValue();
1239
1240     SDValue CmpOp = getValue(CB.CmpMHS);
1241     EVT VT = CmpOp.getValueType();
1242
1243     if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
1244       Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
1245                           ISD::SETLE);
1246     } else {
1247       SDValue SUB = DAG.getNode(ISD::SUB, dl,
1248                                 VT, CmpOp, DAG.getConstant(Low, VT));
1249       Cond = DAG.getSetCC(dl, MVT::i1, SUB,
1250                           DAG.getConstant(High-Low, VT), ISD::SETULE);
1251     }
1252   }
1253
1254   if (DisableScheduling)
1255     DAG.AssignOrdering(Cond.getNode(), SDNodeOrder);
1256
1257   // Update successor info
1258   CurMBB->addSuccessor(CB.TrueBB);
1259   CurMBB->addSuccessor(CB.FalseBB);
1260
1261   // Set NextBlock to be the MBB immediately after the current one, if any.
1262   // This is used to avoid emitting unnecessary branches to the next block.
1263   MachineBasicBlock *NextBlock = 0;
1264   MachineFunction::iterator BBI = CurMBB;
1265   if (++BBI != FuncInfo.MF->end())
1266     NextBlock = BBI;
1267
1268   // If the lhs block is the next block, invert the condition so that we can
1269   // fall through to the lhs instead of the rhs block.
1270   if (CB.TrueBB == NextBlock) {
1271     std::swap(CB.TrueBB, CB.FalseBB);
1272     SDValue True = DAG.getConstant(1, Cond.getValueType());
1273     Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
1274
1275     if (DisableScheduling)
1276       DAG.AssignOrdering(Cond.getNode(), SDNodeOrder);
1277   }
1278
1279   SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
1280                                MVT::Other, getControlRoot(), Cond,
1281                                DAG.getBasicBlock(CB.TrueBB));
1282
1283   if (DisableScheduling)
1284     DAG.AssignOrdering(BrCond.getNode(), SDNodeOrder);
1285
1286   // If the branch was constant folded, fix up the CFG.
1287   if (BrCond.getOpcode() == ISD::BR) {
1288     CurMBB->removeSuccessor(CB.FalseBB);
1289   } else {
1290     // Otherwise, go ahead and insert the false branch.
1291     if (BrCond == getControlRoot())
1292       CurMBB->removeSuccessor(CB.TrueBB);
1293
1294     if (CB.FalseBB != NextBlock) {
1295       BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
1296                            DAG.getBasicBlock(CB.FalseBB));
1297
1298       if (DisableScheduling)
1299         DAG.AssignOrdering(BrCond.getNode(), SDNodeOrder);
1300     }
1301   }
1302
1303   DAG.setRoot(BrCond);
1304 }
1305
1306 /// visitJumpTable - Emit JumpTable node in the current MBB
1307 void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
1308   // Emit the code for the jump table
1309   assert(JT.Reg != -1U && "Should lower JT Header first!");
1310   EVT PTy = TLI.getPointerTy();
1311   SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
1312                                      JT.Reg, PTy);
1313   SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
1314   SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(),
1315                                     MVT::Other, Index.getValue(1),
1316                                     Table, Index);
1317   DAG.setRoot(BrJumpTable);
1318
1319   if (DisableScheduling) {
1320     DAG.AssignOrdering(Index.getNode(), SDNodeOrder);
1321     DAG.AssignOrdering(Table.getNode(), SDNodeOrder);
1322     DAG.AssignOrdering(BrJumpTable.getNode(), SDNodeOrder);
1323   }
1324 }
1325
1326 /// visitJumpTableHeader - This function emits necessary code to produce index
1327 /// in the JumpTable from switch case.
1328 void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
1329                                                JumpTableHeader &JTH) {
1330   // Subtract the lowest switch case value from the value being switched on and
1331   // conditional branch to default mbb if the result is greater than the
1332   // difference between smallest and largest cases.
1333   SDValue SwitchOp = getValue(JTH.SValue);
1334   EVT VT = SwitchOp.getValueType();
1335   SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
1336                             DAG.getConstant(JTH.First, VT));
1337
1338   // The SDNode we just created, which holds the value being switched on minus
1339   // the the smallest case value, needs to be copied to a virtual register so it
1340   // can be used as an index into the jump table in a subsequent basic block.
1341   // This value may be smaller or larger than the target's pointer type, and
1342   // therefore require extension or truncating.
1343   SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy());
1344
1345   unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy());
1346   SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
1347                                     JumpTableReg, SwitchOp);
1348   JT.Reg = JumpTableReg;
1349
1350   // Emit the range check for the jump table, and branch to the default block
1351   // for the switch statement if the value being switched on exceeds the largest
1352   // case in the switch.
1353   SDValue CMP = DAG.getSetCC(getCurDebugLoc(),
1354                              TLI.getSetCCResultType(Sub.getValueType()), Sub,
1355                              DAG.getConstant(JTH.Last-JTH.First,VT),
1356                              ISD::SETUGT);
1357
1358   if (DisableScheduling) {
1359     DAG.AssignOrdering(Sub.getNode(), SDNodeOrder);
1360     DAG.AssignOrdering(SwitchOp.getNode(), SDNodeOrder);
1361     DAG.AssignOrdering(CopyTo.getNode(), SDNodeOrder);
1362     DAG.AssignOrdering(CMP.getNode(), SDNodeOrder);
1363   }
1364
1365   // Set NextBlock to be the MBB immediately after the current one, if any.
1366   // This is used to avoid emitting unnecessary branches to the next block.
1367   MachineBasicBlock *NextBlock = 0;
1368   MachineFunction::iterator BBI = CurMBB;
1369
1370   if (++BBI != FuncInfo.MF->end())
1371     NextBlock = BBI;
1372
1373   SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
1374                                MVT::Other, CopyTo, CMP,
1375                                DAG.getBasicBlock(JT.Default));
1376
1377   if (DisableScheduling)
1378     DAG.AssignOrdering(BrCond.getNode(), SDNodeOrder);
1379
1380   if (JT.MBB != NextBlock) {
1381     BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,
1382                          DAG.getBasicBlock(JT.MBB));
1383
1384     if (DisableScheduling)
1385       DAG.AssignOrdering(BrCond.getNode(), SDNodeOrder);
1386   }
1387
1388   DAG.setRoot(BrCond);
1389 }
1390
1391 /// visitBitTestHeader - This function emits necessary code to produce value
1392 /// suitable for "bit tests"
1393 void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) {
1394   // Subtract the minimum value
1395   SDValue SwitchOp = getValue(B.SValue);
1396   EVT VT = SwitchOp.getValueType();
1397   SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
1398                             DAG.getConstant(B.First, VT));
1399
1400   // Check range
1401   SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(),
1402                                   TLI.getSetCCResultType(Sub.getValueType()),
1403                                   Sub, DAG.getConstant(B.Range, VT),
1404                                   ISD::SETUGT);
1405
1406   SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(),
1407                                        TLI.getPointerTy());
1408
1409   B.Reg = FuncInfo.MakeReg(TLI.getPointerTy());
1410   SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
1411                                     B.Reg, ShiftOp);
1412
1413   if (DisableScheduling) {
1414     DAG.AssignOrdering(Sub.getNode(), SDNodeOrder);
1415     DAG.AssignOrdering(RangeCmp.getNode(), SDNodeOrder);
1416     DAG.AssignOrdering(ShiftOp.getNode(), SDNodeOrder);
1417     DAG.AssignOrdering(CopyTo.getNode(), SDNodeOrder);
1418   }
1419
1420   // Set NextBlock to be the MBB immediately after the current one, if any.
1421   // This is used to avoid emitting unnecessary branches to the next block.
1422   MachineBasicBlock *NextBlock = 0;
1423   MachineFunction::iterator BBI = CurMBB;
1424   if (++BBI != FuncInfo.MF->end())
1425     NextBlock = BBI;
1426
1427   MachineBasicBlock* MBB = B.Cases[0].ThisBB;
1428
1429   CurMBB->addSuccessor(B.Default);
1430   CurMBB->addSuccessor(MBB);
1431
1432   SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
1433                                 MVT::Other, CopyTo, RangeCmp,
1434                                 DAG.getBasicBlock(B.Default));
1435
1436   if (DisableScheduling)
1437     DAG.AssignOrdering(BrRange.getNode(), SDNodeOrder);
1438
1439   if (MBB != NextBlock) {
1440     BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,
1441                           DAG.getBasicBlock(MBB));
1442
1443     if (DisableScheduling)
1444       DAG.AssignOrdering(BrRange.getNode(), SDNodeOrder);
1445   }
1446
1447   DAG.setRoot(BrRange);
1448 }
1449
1450 /// visitBitTestCase - this function produces one "bit test"
1451 void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
1452                                            unsigned Reg,
1453                                            BitTestCase &B) {
1454   // Make desired shift
1455   SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
1456                                        TLI.getPointerTy());
1457   SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
1458                                   TLI.getPointerTy(),
1459                                   DAG.getConstant(1, TLI.getPointerTy()),
1460                                   ShiftOp);
1461
1462   // Emit bit tests and jumps
1463   SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
1464                               TLI.getPointerTy(), SwitchVal,
1465                               DAG.getConstant(B.Mask, TLI.getPointerTy()));
1466   SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(),
1467                                 TLI.getSetCCResultType(AndOp.getValueType()),
1468                                 AndOp, DAG.getConstant(0, TLI.getPointerTy()),
1469                                 ISD::SETNE);
1470
1471   if (DisableScheduling) {
1472     DAG.AssignOrdering(ShiftOp.getNode(), SDNodeOrder);
1473     DAG.AssignOrdering(SwitchVal.getNode(), SDNodeOrder);
1474     DAG.AssignOrdering(AndOp.getNode(), SDNodeOrder);
1475     DAG.AssignOrdering(AndCmp.getNode(), SDNodeOrder);
1476   }
1477
1478   CurMBB->addSuccessor(B.TargetBB);
1479   CurMBB->addSuccessor(NextMBB);
1480
1481   SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
1482                               MVT::Other, getControlRoot(),
1483                               AndCmp, DAG.getBasicBlock(B.TargetBB));
1484
1485   if (DisableScheduling)
1486     DAG.AssignOrdering(BrAnd.getNode(), SDNodeOrder);
1487
1488   // Set NextBlock to be the MBB immediately after the current one, if any.
1489   // This is used to avoid emitting unnecessary branches to the next block.
1490   MachineBasicBlock *NextBlock = 0;
1491   MachineFunction::iterator BBI = CurMBB;
1492   if (++BBI != FuncInfo.MF->end())
1493     NextBlock = BBI;
1494
1495   if (NextMBB != NextBlock) {
1496     BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,
1497                         DAG.getBasicBlock(NextMBB));
1498
1499     if (DisableScheduling)
1500       DAG.AssignOrdering(BrAnd.getNode(), SDNodeOrder);
1501   }
1502
1503   DAG.setRoot(BrAnd);
1504 }
1505
1506 void SelectionDAGBuilder::visitInvoke(InvokeInst &I) {
1507   // Retrieve successors.
1508   MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
1509   MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
1510
1511   const Value *Callee(I.getCalledValue());
1512   if (isa<InlineAsm>(Callee))
1513     visitInlineAsm(&I);
1514   else
1515     LowerCallTo(&I, getValue(Callee), false, LandingPad);
1516
1517   // If the value of the invoke is used outside of its defining block, make it
1518   // available as a virtual register.
1519   CopyToExportRegsIfNeeded(&I);
1520
1521   // Update successor info
1522   CurMBB->addSuccessor(Return);
1523   CurMBB->addSuccessor(LandingPad);
1524
1525   // Drop into normal successor.
1526   SDValue Branch = DAG.getNode(ISD::BR, getCurDebugLoc(),
1527                                MVT::Other, getControlRoot(),
1528                                DAG.getBasicBlock(Return));
1529   DAG.setRoot(Branch);
1530
1531   if (DisableScheduling)
1532     DAG.AssignOrdering(Branch.getNode(), SDNodeOrder);
1533 }
1534
1535 void SelectionDAGBuilder::visitUnwind(UnwindInst &I) {
1536 }
1537
1538 /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
1539 /// small case ranges).
1540 bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
1541                                                  CaseRecVector& WorkList,
1542                                                  Value* SV,
1543                                                  MachineBasicBlock* Default) {
1544   Case& BackCase  = *(CR.Range.second-1);
1545
1546   // Size is the number of Cases represented by this range.
1547   size_t Size = CR.Range.second - CR.Range.first;
1548   if (Size > 3)
1549     return false;
1550
1551   // Get the MachineFunction which holds the current MBB.  This is used when
1552   // inserting any additional MBBs necessary to represent the switch.
1553   MachineFunction *CurMF = FuncInfo.MF;
1554
1555   // Figure out which block is immediately after the current one.
1556   MachineBasicBlock *NextBlock = 0;
1557   MachineFunction::iterator BBI = CR.CaseBB;
1558
1559   if (++BBI != FuncInfo.MF->end())
1560     NextBlock = BBI;
1561
1562   // TODO: If any two of the cases has the same destination, and if one value
1563   // is the same as the other, but has one bit unset that the other has set,
1564   // use bit manipulation to do two compares at once.  For example:
1565   // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
1566
1567   // Rearrange the case blocks so that the last one falls through if possible.
1568   if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
1569     // The last case block won't fall through into 'NextBlock' if we emit the
1570     // branches in this order.  See if rearranging a case value would help.
1571     for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
1572       if (I->BB == NextBlock) {
1573         std::swap(*I, BackCase);
1574         break;
1575       }
1576     }
1577   }
1578
1579   // Create a CaseBlock record representing a conditional branch to
1580   // the Case's target mbb if the value being switched on SV is equal
1581   // to C.
1582   MachineBasicBlock *CurBlock = CR.CaseBB;
1583   for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
1584     MachineBasicBlock *FallThrough;
1585     if (I != E-1) {
1586       FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
1587       CurMF->insert(BBI, FallThrough);
1588
1589       // Put SV in a virtual register to make it available from the new blocks.
1590       ExportFromCurrentBlock(SV);
1591     } else {
1592       // If the last case doesn't match, go to the default block.
1593       FallThrough = Default;
1594     }
1595
1596     Value *RHS, *LHS, *MHS;
1597     ISD::CondCode CC;
1598     if (I->High == I->Low) {
1599       // This is just small small case range :) containing exactly 1 case
1600       CC = ISD::SETEQ;
1601       LHS = SV; RHS = I->High; MHS = NULL;
1602     } else {
1603       CC = ISD::SETLE;
1604       LHS = I->Low; MHS = SV; RHS = I->High;
1605     }
1606     CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);
1607
1608     // If emitting the first comparison, just call visitSwitchCase to emit the
1609     // code into the current block.  Otherwise, push the CaseBlock onto the
1610     // vector to be later processed by SDISel, and insert the node's MBB
1611     // before the next MBB.
1612     if (CurBlock == CurMBB)
1613       visitSwitchCase(CB);
1614     else
1615       SwitchCases.push_back(CB);
1616
1617     CurBlock = FallThrough;
1618   }
1619
1620   return true;
1621 }
1622
1623 static inline bool areJTsAllowed(const TargetLowering &TLI) {
1624   return !DisableJumpTables &&
1625           (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
1626            TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
1627 }
1628
1629 static APInt ComputeRange(const APInt &First, const APInt &Last) {
1630   APInt LastExt(Last), FirstExt(First);
1631   uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
1632   LastExt.sext(BitWidth); FirstExt.sext(BitWidth);
1633   return (LastExt - FirstExt + 1ULL);
1634 }
1635
1636 /// handleJTSwitchCase - Emit jumptable for current switch case range
1637 bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
1638                                              CaseRecVector& WorkList,
1639                                              Value* SV,
1640                                              MachineBasicBlock* Default) {
1641   Case& FrontCase = *CR.Range.first;
1642   Case& BackCase  = *(CR.Range.second-1);
1643
1644   const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
1645   const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
1646
1647   APInt TSize(First.getBitWidth(), 0);
1648   for (CaseItr I = CR.Range.first, E = CR.Range.second;
1649        I!=E; ++I)
1650     TSize += I->size();
1651
1652   if (!areJTsAllowed(TLI) || TSize.ult(APInt(First.getBitWidth(), 4)))
1653     return false;
1654
1655   APInt Range = ComputeRange(First, Last);
1656   double Density = TSize.roundToDouble() / Range.roundToDouble();
1657   if (Density < 0.4)
1658     return false;
1659
1660   DEBUG(errs() << "Lowering jump table\n"
1661                << "First entry: " << First << ". Last entry: " << Last << '\n'
1662                << "Range: " << Range
1663                << "Size: " << TSize << ". Density: " << Density << "\n\n");
1664
1665   // Get the MachineFunction which holds the current MBB.  This is used when
1666   // inserting any additional MBBs necessary to represent the switch.
1667   MachineFunction *CurMF = FuncInfo.MF;
1668
1669   // Figure out which block is immediately after the current one.
1670   MachineFunction::iterator BBI = CR.CaseBB;
1671   ++BBI;
1672
1673   const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
1674
1675   // Create a new basic block to hold the code for loading the address
1676   // of the jump table, and jumping to it.  Update successor information;
1677   // we will either branch to the default case for the switch, or the jump
1678   // table.
1679   MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
1680   CurMF->insert(BBI, JumpTableBB);
1681   CR.CaseBB->addSuccessor(Default);
1682   CR.CaseBB->addSuccessor(JumpTableBB);
1683
1684   // Build a vector of destination BBs, corresponding to each target
1685   // of the jump table. If the value of the jump table slot corresponds to
1686   // a case statement, push the case's BB onto the vector, otherwise, push
1687   // the default BB.
1688   std::vector<MachineBasicBlock*> DestBBs;
1689   APInt TEI = First;
1690   for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
1691     const APInt& Low = cast<ConstantInt>(I->Low)->getValue();
1692     const APInt& High = cast<ConstantInt>(I->High)->getValue();
1693
1694     if (Low.sle(TEI) && TEI.sle(High)) {
1695       DestBBs.push_back(I->BB);
1696       if (TEI==High)
1697         ++I;
1698     } else {
1699       DestBBs.push_back(Default);
1700     }
1701   }
1702
1703   // Update successor info. Add one edge to each unique successor.
1704   BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
1705   for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
1706          E = DestBBs.end(); I != E; ++I) {
1707     if (!SuccsHandled[(*I)->getNumber()]) {
1708       SuccsHandled[(*I)->getNumber()] = true;
1709       JumpTableBB->addSuccessor(*I);
1710     }
1711   }
1712
1713   // Create a jump table index for this jump table, or return an existing
1714   // one.
1715   unsigned JTI = CurMF->getJumpTableInfo()->getJumpTableIndex(DestBBs);
1716
1717   // Set the jump table information so that we can codegen it as a second
1718   // MachineBasicBlock
1719   JumpTable JT(-1U, JTI, JumpTableBB, Default);
1720   JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == CurMBB));
1721   if (CR.CaseBB == CurMBB)
1722     visitJumpTableHeader(JT, JTH);
1723
1724   JTCases.push_back(JumpTableBlock(JTH, JT));
1725
1726   return true;
1727 }
1728
1729 /// handleBTSplitSwitchCase - emit comparison and split binary search tree into
1730 /// 2 subtrees.
1731 bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
1732                                                   CaseRecVector& WorkList,
1733                                                   Value* SV,
1734                                                   MachineBasicBlock* Default) {
1735   // Get the MachineFunction which holds the current MBB.  This is used when
1736   // inserting any additional MBBs necessary to represent the switch.
1737   MachineFunction *CurMF = FuncInfo.MF;
1738
1739   // Figure out which block is immediately after the current one.
1740   MachineFunction::iterator BBI = CR.CaseBB;
1741   ++BBI;
1742
1743   Case& FrontCase = *CR.Range.first;
1744   Case& BackCase  = *(CR.Range.second-1);
1745   const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
1746
1747   // Size is the number of Cases represented by this range.
1748   unsigned Size = CR.Range.second - CR.Range.first;
1749
1750   const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
1751   const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
1752   double FMetric = 0;
1753   CaseItr Pivot = CR.Range.first + Size/2;
1754
1755   // Select optimal pivot, maximizing sum density of LHS and RHS. This will
1756   // (heuristically) allow us to emit JumpTable's later.
1757   APInt TSize(First.getBitWidth(), 0);
1758   for (CaseItr I = CR.Range.first, E = CR.Range.second;
1759        I!=E; ++I)
1760     TSize += I->size();
1761
1762   APInt LSize = FrontCase.size();
1763   APInt RSize = TSize-LSize;
1764   DEBUG(errs() << "Selecting best pivot: \n"
1765                << "First: " << First << ", Last: " << Last <<'\n'
1766                << "LSize: " << LSize << ", RSize: " << RSize << '\n');
1767   for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
1768        J!=E; ++I, ++J) {
1769     const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
1770     const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
1771     APInt Range = ComputeRange(LEnd, RBegin);
1772     assert((Range - 2ULL).isNonNegative() &&
1773            "Invalid case distance");
1774     double LDensity = (double)LSize.roundToDouble() /
1775                            (LEnd - First + 1ULL).roundToDouble();
1776     double RDensity = (double)RSize.roundToDouble() /
1777                            (Last - RBegin + 1ULL).roundToDouble();
1778     double Metric = Range.logBase2()*(LDensity+RDensity);
1779     // Should always split in some non-trivial place
1780     DEBUG(errs() <<"=>Step\n"
1781                  << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
1782                  << "LDensity: " << LDensity
1783                  << ", RDensity: " << RDensity << '\n'
1784                  << "Metric: " << Metric << '\n');
1785     if (FMetric < Metric) {
1786       Pivot = J;
1787       FMetric = Metric;
1788       DEBUG(errs() << "Current metric set to: " << FMetric << '\n');
1789     }
1790
1791     LSize += J->size();
1792     RSize -= J->size();
1793   }
1794   if (areJTsAllowed(TLI)) {
1795     // If our case is dense we *really* should handle it earlier!
1796     assert((FMetric > 0) && "Should handle dense range earlier!");
1797   } else {
1798     Pivot = CR.Range.first + Size/2;
1799   }
1800
1801   CaseRange LHSR(CR.Range.first, Pivot);
1802   CaseRange RHSR(Pivot, CR.Range.second);
1803   Constant *C = Pivot->Low;
1804   MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
1805
1806   // We know that we branch to the LHS if the Value being switched on is
1807   // less than the Pivot value, C.  We use this to optimize our binary
1808   // tree a bit, by recognizing that if SV is greater than or equal to the
1809   // LHS's Case Value, and that Case Value is exactly one less than the
1810   // Pivot's Value, then we can branch directly to the LHS's Target,
1811   // rather than creating a leaf node for it.
1812   if ((LHSR.second - LHSR.first) == 1 &&
1813       LHSR.first->High == CR.GE &&
1814       cast<ConstantInt>(C)->getValue() ==
1815       (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
1816     TrueBB = LHSR.first->BB;
1817   } else {
1818     TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
1819     CurMF->insert(BBI, TrueBB);
1820     WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
1821
1822     // Put SV in a virtual register to make it available from the new blocks.
1823     ExportFromCurrentBlock(SV);
1824   }
1825
1826   // Similar to the optimization above, if the Value being switched on is
1827   // known to be less than the Constant CR.LT, and the current Case Value
1828   // is CR.LT - 1, then we can branch directly to the target block for
1829   // the current Case Value, rather than emitting a RHS leaf node for it.
1830   if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
1831       cast<ConstantInt>(RHSR.first->Low)->getValue() ==
1832       (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
1833     FalseBB = RHSR.first->BB;
1834   } else {
1835     FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
1836     CurMF->insert(BBI, FalseBB);
1837     WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
1838
1839     // Put SV in a virtual register to make it available from the new blocks.
1840     ExportFromCurrentBlock(SV);
1841   }
1842
1843   // Create a CaseBlock record representing a conditional branch to
1844   // the LHS node if the value being switched on SV is less than C.
1845   // Otherwise, branch to LHS.
1846   CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
1847
1848   if (CR.CaseBB == CurMBB)
1849     visitSwitchCase(CB);
1850   else
1851     SwitchCases.push_back(CB);
1852
1853   return true;
1854 }
1855
1856 /// handleBitTestsSwitchCase - if current case range has few destination and
1857 /// range span less, than machine word bitwidth, encode case range into series
1858 /// of masks and emit bit tests with these masks.
1859 bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
1860                                                    CaseRecVector& WorkList,
1861                                                    Value* SV,
1862                                                    MachineBasicBlock* Default){
1863   EVT PTy = TLI.getPointerTy();
1864   unsigned IntPtrBits = PTy.getSizeInBits();
1865
1866   Case& FrontCase = *CR.Range.first;
1867   Case& BackCase  = *(CR.Range.second-1);
1868
1869   // Get the MachineFunction which holds the current MBB.  This is used when
1870   // inserting any additional MBBs necessary to represent the switch.
1871   MachineFunction *CurMF = FuncInfo.MF;
1872
1873   // If target does not have legal shift left, do not emit bit tests at all.
1874   if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
1875     return false;
1876
1877   size_t numCmps = 0;
1878   for (CaseItr I = CR.Range.first, E = CR.Range.second;
1879        I!=E; ++I) {
1880     // Single case counts one, case range - two.
1881     numCmps += (I->Low == I->High ? 1 : 2);
1882   }
1883
1884   // Count unique destinations
1885   SmallSet<MachineBasicBlock*, 4> Dests;
1886   for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
1887     Dests.insert(I->BB);
1888     if (Dests.size() > 3)
1889       // Don't bother the code below, if there are too much unique destinations
1890       return false;
1891   }
1892   DEBUG(errs() << "Total number of unique destinations: " << Dests.size() << '\n'
1893                << "Total number of comparisons: " << numCmps << '\n');
1894
1895   // Compute span of values.
1896   const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
1897   const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
1898   APInt cmpRange = maxValue - minValue;
1899
1900   DEBUG(errs() << "Compare range: " << cmpRange << '\n'
1901                << "Low bound: " << minValue << '\n'
1902                << "High bound: " << maxValue << '\n');
1903
1904   if (cmpRange.uge(APInt(cmpRange.getBitWidth(), IntPtrBits)) ||
1905       (!(Dests.size() == 1 && numCmps >= 3) &&
1906        !(Dests.size() == 2 && numCmps >= 5) &&
1907        !(Dests.size() >= 3 && numCmps >= 6)))
1908     return false;
1909
1910   DEBUG(errs() << "Emitting bit tests\n");
1911   APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
1912
1913   // Optimize the case where all the case values fit in a
1914   // word without having to subtract minValue. In this case,
1915   // we can optimize away the subtraction.
1916   if (minValue.isNonNegative() &&
1917       maxValue.slt(APInt(maxValue.getBitWidth(), IntPtrBits))) {
1918     cmpRange = maxValue;
1919   } else {
1920     lowBound = minValue;
1921   }
1922
1923   CaseBitsVector CasesBits;
1924   unsigned i, count = 0;
1925
1926   for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
1927     MachineBasicBlock* Dest = I->BB;
1928     for (i = 0; i < count; ++i)
1929       if (Dest == CasesBits[i].BB)
1930         break;
1931
1932     if (i == count) {
1933       assert((count < 3) && "Too much destinations to test!");
1934       CasesBits.push_back(CaseBits(0, Dest, 0));
1935       count++;
1936     }
1937
1938     const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
1939     const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
1940
1941     uint64_t lo = (lowValue - lowBound).getZExtValue();
1942     uint64_t hi = (highValue - lowBound).getZExtValue();
1943
1944     for (uint64_t j = lo; j <= hi; j++) {
1945       CasesBits[i].Mask |=  1ULL << j;
1946       CasesBits[i].Bits++;
1947     }
1948
1949   }
1950   std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
1951
1952   BitTestInfo BTC;
1953
1954   // Figure out which block is immediately after the current one.
1955   MachineFunction::iterator BBI = CR.CaseBB;
1956   ++BBI;
1957
1958   const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
1959
1960   DEBUG(errs() << "Cases:\n");
1961   for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
1962     DEBUG(errs() << "Mask: " << CasesBits[i].Mask
1963                  << ", Bits: " << CasesBits[i].Bits
1964                  << ", BB: " << CasesBits[i].BB << '\n');
1965
1966     MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
1967     CurMF->insert(BBI, CaseBB);
1968     BTC.push_back(BitTestCase(CasesBits[i].Mask,
1969                               CaseBB,
1970                               CasesBits[i].BB));
1971
1972     // Put SV in a virtual register to make it available from the new blocks.
1973     ExportFromCurrentBlock(SV);
1974   }
1975
1976   BitTestBlock BTB(lowBound, cmpRange, SV,
1977                    -1U, (CR.CaseBB == CurMBB),
1978                    CR.CaseBB, Default, BTC);
1979
1980   if (CR.CaseBB == CurMBB)
1981     visitBitTestHeader(BTB);
1982
1983   BitTestCases.push_back(BTB);
1984
1985   return true;
1986 }
1987
1988 /// Clusterify - Transform simple list of Cases into list of CaseRange's
1989 size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
1990                                        const SwitchInst& SI) {
1991   size_t numCmps = 0;
1992
1993   // Start with "simple" cases
1994   for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
1995     MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
1996     Cases.push_back(Case(SI.getSuccessorValue(i),
1997                          SI.getSuccessorValue(i),
1998                          SMBB));
1999   }
2000   std::sort(Cases.begin(), Cases.end(), CaseCmp());
2001
2002   // Merge case into clusters
2003   if (Cases.size() >= 2)
2004     // Must recompute end() each iteration because it may be
2005     // invalidated by erase if we hold on to it
2006     for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
2007       const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
2008       const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
2009       MachineBasicBlock* nextBB = J->BB;
2010       MachineBasicBlock* currentBB = I->BB;
2011
2012       // If the two neighboring cases go to the same destination, merge them
2013       // into a single case.
2014       if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
2015         I->High = J->High;
2016         J = Cases.erase(J);
2017       } else {
2018         I = J++;
2019       }
2020     }
2021
2022   for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
2023     if (I->Low != I->High)
2024       // A range counts double, since it requires two compares.
2025       ++numCmps;
2026   }
2027
2028   return numCmps;
2029 }
2030
2031 void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
2032   // Figure out which block is immediately after the current one.
2033   MachineBasicBlock *NextBlock = 0;
2034   MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
2035
2036   // If there is only the default destination, branch to it if it is not the
2037   // next basic block.  Otherwise, just fall through.
2038   if (SI.getNumOperands() == 2) {
2039     // Update machine-CFG edges.
2040
2041     // If this is not a fall-through branch, emit the branch.
2042     CurMBB->addSuccessor(Default);
2043     if (Default != NextBlock) {
2044       SDValue Res = DAG.getNode(ISD::BR, getCurDebugLoc(),
2045                                 MVT::Other, getControlRoot(),
2046                                 DAG.getBasicBlock(Default));
2047       DAG.setRoot(Res);
2048
2049       if (DisableScheduling)
2050         DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2051     }
2052
2053     return;
2054   }
2055
2056   // If there are any non-default case statements, create a vector of Cases
2057   // representing each one, and sort the vector so that we can efficiently
2058   // create a binary search tree from them.
2059   CaseVector Cases;
2060   size_t numCmps = Clusterify(Cases, SI);
2061   DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size()
2062                << ". Total compares: " << numCmps << '\n');
2063   numCmps = 0;
2064
2065   // Get the Value to be switched on and default basic blocks, which will be
2066   // inserted into CaseBlock records, representing basic blocks in the binary
2067   // search tree.
2068   Value *SV = SI.getOperand(0);
2069
2070   // Push the initial CaseRec onto the worklist
2071   CaseRecVector WorkList;
2072   WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end())));
2073
2074   while (!WorkList.empty()) {
2075     // Grab a record representing a case range to process off the worklist
2076     CaseRec CR = WorkList.back();
2077     WorkList.pop_back();
2078
2079     if (handleBitTestsSwitchCase(CR, WorkList, SV, Default))
2080       continue;
2081
2082     // If the range has few cases (two or less) emit a series of specific
2083     // tests.
2084     if (handleSmallSwitchRange(CR, WorkList, SV, Default))
2085       continue;
2086
2087     // If the switch has more than 5 blocks, and at least 40% dense, and the
2088     // target supports indirect branches, then emit a jump table rather than
2089     // lowering the switch to a binary tree of conditional branches.
2090     if (handleJTSwitchCase(CR, WorkList, SV, Default))
2091       continue;
2092
2093     // Emit binary tree. We need to pick a pivot, and push left and right ranges
2094     // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
2095     handleBTSplitSwitchCase(CR, WorkList, SV, Default);
2096   }
2097 }
2098
2099 void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) {
2100   // Update machine-CFG edges.
2101   for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
2102     CurMBB->addSuccessor(FuncInfo.MBBMap[I.getSuccessor(i)]);
2103
2104   SDValue Res = DAG.getNode(ISD::BRIND, getCurDebugLoc(),
2105                             MVT::Other, getControlRoot(),
2106                             getValue(I.getAddress()));
2107   DAG.setRoot(Res);
2108
2109   if (DisableScheduling)
2110     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2111 }
2112
2113 void SelectionDAGBuilder::visitFSub(User &I) {
2114   // -0.0 - X --> fneg
2115   const Type *Ty = I.getType();
2116   if (isa<VectorType>(Ty)) {
2117     if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
2118       const VectorType *DestTy = cast<VectorType>(I.getType());
2119       const Type *ElTy = DestTy->getElementType();
2120       unsigned VL = DestTy->getNumElements();
2121       std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
2122       Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
2123       if (CV == CNZ) {
2124         SDValue Op2 = getValue(I.getOperand(1));
2125         SDValue Res = DAG.getNode(ISD::FNEG, getCurDebugLoc(),
2126                                   Op2.getValueType(), Op2);
2127         setValue(&I, Res);
2128
2129         if (DisableScheduling)
2130           DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2131
2132         return;
2133       }
2134     }
2135   }
2136
2137   if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
2138     if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
2139       SDValue Op2 = getValue(I.getOperand(1));
2140       SDValue Res = DAG.getNode(ISD::FNEG, getCurDebugLoc(),
2141                                 Op2.getValueType(), Op2);
2142       setValue(&I, Res);
2143
2144       if (DisableScheduling)
2145         DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2146
2147       return;
2148     }
2149
2150   visitBinary(I, ISD::FSUB);
2151 }
2152
2153 void SelectionDAGBuilder::visitBinary(User &I, unsigned OpCode) {
2154   SDValue Op1 = getValue(I.getOperand(0));
2155   SDValue Op2 = getValue(I.getOperand(1));
2156   SDValue Res = DAG.getNode(OpCode, getCurDebugLoc(),
2157                             Op1.getValueType(), Op1, Op2);
2158   setValue(&I, Res);
2159
2160   if (DisableScheduling)
2161     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2162 }
2163
2164 void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) {
2165   SDValue Op1 = getValue(I.getOperand(0));
2166   SDValue Op2 = getValue(I.getOperand(1));
2167   if (!isa<VectorType>(I.getType()) &&
2168       Op2.getValueType() != TLI.getShiftAmountTy()) {
2169     // If the operand is smaller than the shift count type, promote it.
2170     EVT PTy = TLI.getPointerTy();
2171     EVT STy = TLI.getShiftAmountTy();
2172     if (STy.bitsGT(Op2.getValueType()))
2173       Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
2174                         TLI.getShiftAmountTy(), Op2);
2175     // If the operand is larger than the shift count type but the shift
2176     // count type has enough bits to represent any shift value, truncate
2177     // it now. This is a common case and it exposes the truncate to
2178     // optimization early.
2179     else if (STy.getSizeInBits() >=
2180              Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
2181       Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
2182                         TLI.getShiftAmountTy(), Op2);
2183     // Otherwise we'll need to temporarily settle for some other
2184     // convenient type; type legalization will make adjustments as
2185     // needed.
2186     else if (PTy.bitsLT(Op2.getValueType()))
2187       Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
2188                         TLI.getPointerTy(), Op2);
2189     else if (PTy.bitsGT(Op2.getValueType()))
2190       Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
2191                         TLI.getPointerTy(), Op2);
2192   }
2193
2194   SDValue Res = DAG.getNode(Opcode, getCurDebugLoc(),
2195                             Op1.getValueType(), Op1, Op2);
2196   setValue(&I, Res);
2197
2198   if (DisableScheduling) {
2199     DAG.AssignOrdering(Op1.getNode(), SDNodeOrder);
2200     DAG.AssignOrdering(Op2.getNode(), SDNodeOrder);
2201     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2202   }
2203 }
2204
2205 void SelectionDAGBuilder::visitICmp(User &I) {
2206   ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
2207   if (ICmpInst *IC = dyn_cast<ICmpInst>(&I))
2208     predicate = IC->getPredicate();
2209   else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
2210     predicate = ICmpInst::Predicate(IC->getPredicate());
2211   SDValue Op1 = getValue(I.getOperand(0));
2212   SDValue Op2 = getValue(I.getOperand(1));
2213   ISD::CondCode Opcode = getICmpCondCode(predicate);
2214
2215   EVT DestVT = TLI.getValueType(I.getType());
2216   SDValue Res = DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode);
2217   setValue(&I, Res);
2218
2219   if (DisableScheduling)
2220     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2221 }
2222
2223 void SelectionDAGBuilder::visitFCmp(User &I) {
2224   FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
2225   if (FCmpInst *FC = dyn_cast<FCmpInst>(&I))
2226     predicate = FC->getPredicate();
2227   else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
2228     predicate = FCmpInst::Predicate(FC->getPredicate());
2229   SDValue Op1 = getValue(I.getOperand(0));
2230   SDValue Op2 = getValue(I.getOperand(1));
2231   ISD::CondCode Condition = getFCmpCondCode(predicate);
2232   EVT DestVT = TLI.getValueType(I.getType());
2233   SDValue Res = DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition);
2234   setValue(&I, Res);
2235
2236   if (DisableScheduling)
2237     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2238 }
2239
2240 void SelectionDAGBuilder::visitSelect(User &I) {
2241   SmallVector<EVT, 4> ValueVTs;
2242   ComputeValueVTs(TLI, I.getType(), ValueVTs);
2243   unsigned NumValues = ValueVTs.size();
2244   if (NumValues == 0) return;
2245
2246   SmallVector<SDValue, 4> Values(NumValues);
2247   SDValue Cond     = getValue(I.getOperand(0));
2248   SDValue TrueVal  = getValue(I.getOperand(1));
2249   SDValue FalseVal = getValue(I.getOperand(2));
2250
2251   for (unsigned i = 0; i != NumValues; ++i) {
2252     Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
2253                             TrueVal.getNode()->getValueType(i), Cond,
2254                             SDValue(TrueVal.getNode(),
2255                                     TrueVal.getResNo() + i),
2256                             SDValue(FalseVal.getNode(),
2257                                     FalseVal.getResNo() + i));
2258
2259     if (DisableScheduling)
2260       DAG.AssignOrdering(Values[i].getNode(), SDNodeOrder);
2261   }
2262
2263   SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
2264                             DAG.getVTList(&ValueVTs[0], NumValues),
2265                             &Values[0], NumValues);
2266   setValue(&I, Res);
2267
2268   if (DisableScheduling)
2269     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2270 }
2271
2272 void SelectionDAGBuilder::visitTrunc(User &I) {
2273   // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
2274   SDValue N = getValue(I.getOperand(0));
2275   EVT DestVT = TLI.getValueType(I.getType());
2276   SDValue Res = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N);
2277   setValue(&I, Res);
2278
2279   if (DisableScheduling)
2280     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2281 }
2282
2283 void SelectionDAGBuilder::visitZExt(User &I) {
2284   // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2285   // ZExt also can't be a cast to bool for same reason. So, nothing much to do
2286   SDValue N = getValue(I.getOperand(0));
2287   EVT DestVT = TLI.getValueType(I.getType());
2288   SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N);
2289   setValue(&I, Res);
2290
2291   if (DisableScheduling)
2292     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2293 }
2294
2295 void SelectionDAGBuilder::visitSExt(User &I) {
2296   // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2297   // SExt also can't be a cast to bool for same reason. So, nothing much to do
2298   SDValue N = getValue(I.getOperand(0));
2299   EVT DestVT = TLI.getValueType(I.getType());
2300   SDValue Res = DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N);
2301   setValue(&I, Res);
2302
2303   if (DisableScheduling)
2304     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2305 }
2306
2307 void SelectionDAGBuilder::visitFPTrunc(User &I) {
2308   // FPTrunc is never a no-op cast, no need to check
2309   SDValue N = getValue(I.getOperand(0));
2310   EVT DestVT = TLI.getValueType(I.getType());
2311   SDValue Res = DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
2312                             DestVT, N, DAG.getIntPtrConstant(0));
2313   setValue(&I, Res);
2314
2315   if (DisableScheduling)
2316     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2317 }
2318
2319 void SelectionDAGBuilder::visitFPExt(User &I){
2320   // FPTrunc is never a no-op cast, no need to check
2321   SDValue N = getValue(I.getOperand(0));
2322   EVT DestVT = TLI.getValueType(I.getType());
2323   SDValue Res = DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N);
2324   setValue(&I, Res);
2325
2326   if (DisableScheduling)
2327     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2328 }
2329
2330 void SelectionDAGBuilder::visitFPToUI(User &I) {
2331   // FPToUI is never a no-op cast, no need to check
2332   SDValue N = getValue(I.getOperand(0));
2333   EVT DestVT = TLI.getValueType(I.getType());
2334   SDValue Res = DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N);
2335   setValue(&I, Res);
2336
2337   if (DisableScheduling)
2338     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2339 }
2340
2341 void SelectionDAGBuilder::visitFPToSI(User &I) {
2342   // FPToSI is never a no-op cast, no need to check
2343   SDValue N = getValue(I.getOperand(0));
2344   EVT DestVT = TLI.getValueType(I.getType());
2345   SDValue Res = DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N);
2346   setValue(&I, Res);
2347
2348   if (DisableScheduling)
2349     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2350 }
2351
2352 void SelectionDAGBuilder::visitUIToFP(User &I) {
2353   // UIToFP is never a no-op cast, no need to check
2354   SDValue N = getValue(I.getOperand(0));
2355   EVT DestVT = TLI.getValueType(I.getType());
2356   SDValue Res = DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N);
2357   setValue(&I, Res);
2358
2359   if (DisableScheduling)
2360     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2361 }
2362
2363 void SelectionDAGBuilder::visitSIToFP(User &I){
2364   // SIToFP is never a no-op cast, no need to check
2365   SDValue N = getValue(I.getOperand(0));
2366   EVT DestVT = TLI.getValueType(I.getType());
2367   SDValue Res = DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N);
2368   setValue(&I, Res);
2369
2370   if (DisableScheduling)
2371     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2372 }
2373
2374 void SelectionDAGBuilder::visitPtrToInt(User &I) {
2375   // What to do depends on the size of the integer and the size of the pointer.
2376   // We can either truncate, zero extend, or no-op, accordingly.
2377   SDValue N = getValue(I.getOperand(0));
2378   EVT SrcVT = N.getValueType();
2379   EVT DestVT = TLI.getValueType(I.getType());
2380   SDValue Res = DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT);
2381   setValue(&I, Res);
2382
2383   if (DisableScheduling)
2384     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2385 }
2386
2387 void SelectionDAGBuilder::visitIntToPtr(User &I) {
2388   // What to do depends on the size of the integer and the size of the pointer.
2389   // We can either truncate, zero extend, or no-op, accordingly.
2390   SDValue N = getValue(I.getOperand(0));
2391   EVT SrcVT = N.getValueType();
2392   EVT DestVT = TLI.getValueType(I.getType());
2393   SDValue Res = DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT);
2394   setValue(&I, Res);
2395
2396   if (DisableScheduling)
2397     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2398 }
2399
2400 void SelectionDAGBuilder::visitBitCast(User &I) {
2401   SDValue N = getValue(I.getOperand(0));
2402   EVT DestVT = TLI.getValueType(I.getType());
2403
2404   // BitCast assures us that source and destination are the same size so this is
2405   // either a BIT_CONVERT or a no-op.
2406   if (DestVT != N.getValueType()) {
2407     SDValue Res = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
2408                               DestVT, N); // convert types.
2409     setValue(&I, Res);
2410
2411     if (DisableScheduling)
2412       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2413   } else {
2414     setValue(&I, N);            // noop cast.
2415   }
2416 }
2417
2418 void SelectionDAGBuilder::visitInsertElement(User &I) {
2419   SDValue InVec = getValue(I.getOperand(0));
2420   SDValue InVal = getValue(I.getOperand(1));
2421   SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
2422                               TLI.getPointerTy(),
2423                               getValue(I.getOperand(2)));
2424   SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
2425                             TLI.getValueType(I.getType()),
2426                             InVec, InVal, InIdx);
2427   setValue(&I, Res);
2428
2429   if (DisableScheduling) {
2430     DAG.AssignOrdering(InIdx.getNode(), SDNodeOrder);
2431     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2432   }
2433 }
2434
2435 void SelectionDAGBuilder::visitExtractElement(User &I) {
2436   SDValue InVec = getValue(I.getOperand(0));
2437   SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
2438                               TLI.getPointerTy(),
2439                               getValue(I.getOperand(1)));
2440   SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
2441                             TLI.getValueType(I.getType()), InVec, InIdx);
2442   setValue(&I, Res);
2443
2444   if (DisableScheduling) {
2445     DAG.AssignOrdering(InIdx.getNode(), SDNodeOrder);
2446     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2447   }
2448 }
2449
2450
2451 // Utility for visitShuffleVector - Returns true if the mask is mask starting
2452 // from SIndx and increasing to the element length (undefs are allowed).
2453 static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
2454   unsigned MaskNumElts = Mask.size();
2455   for (unsigned i = 0; i != MaskNumElts; ++i)
2456     if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
2457       return false;
2458   return true;
2459 }
2460
2461 void SelectionDAGBuilder::visitShuffleVector(User &I) {
2462   SmallVector<int, 8> Mask;
2463   SDValue Src1 = getValue(I.getOperand(0));
2464   SDValue Src2 = getValue(I.getOperand(1));
2465
2466   // Convert the ConstantVector mask operand into an array of ints, with -1
2467   // representing undef values.
2468   SmallVector<Constant*, 8> MaskElts;
2469   cast<Constant>(I.getOperand(2))->getVectorElements(*DAG.getContext(),
2470                                                      MaskElts);
2471   unsigned MaskNumElts = MaskElts.size();
2472   for (unsigned i = 0; i != MaskNumElts; ++i) {
2473     if (isa<UndefValue>(MaskElts[i]))
2474       Mask.push_back(-1);
2475     else
2476       Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
2477   }
2478
2479   EVT VT = TLI.getValueType(I.getType());
2480   EVT SrcVT = Src1.getValueType();
2481   unsigned SrcNumElts = SrcVT.getVectorNumElements();
2482
2483   if (SrcNumElts == MaskNumElts) {
2484     SDValue Res = DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
2485                                        &Mask[0]);
2486     setValue(&I, Res);
2487
2488     if (DisableScheduling)
2489       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2490
2491     return;
2492   }
2493
2494   // Normalize the shuffle vector since mask and vector length don't match.
2495   if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
2496     // Mask is longer than the source vectors and is a multiple of the source
2497     // vectors.  We can use concatenate vector to make the mask and vectors
2498     // lengths match.
2499     if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
2500       // The shuffle is concatenating two vectors together.
2501       SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
2502                                 VT, Src1, Src2);
2503       setValue(&I, Res);
2504
2505       if (DisableScheduling)
2506         DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2507
2508       return;
2509     }
2510
2511     // Pad both vectors with undefs to make them the same length as the mask.
2512     unsigned NumConcat = MaskNumElts / SrcNumElts;
2513     bool Src1U = Src1.getOpcode() == ISD::UNDEF;
2514     bool Src2U = Src2.getOpcode() == ISD::UNDEF;
2515     SDValue UndefVal = DAG.getUNDEF(SrcVT);
2516
2517     SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
2518     SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
2519     MOps1[0] = Src1;
2520     MOps2[0] = Src2;
2521
2522     Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
2523                                                   getCurDebugLoc(), VT,
2524                                                   &MOps1[0], NumConcat);
2525     Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
2526                                                   getCurDebugLoc(), VT,
2527                                                   &MOps2[0], NumConcat);
2528
2529     // Readjust mask for new input vector length.
2530     SmallVector<int, 8> MappedOps;
2531     for (unsigned i = 0; i != MaskNumElts; ++i) {
2532       int Idx = Mask[i];
2533       if (Idx < (int)SrcNumElts)
2534         MappedOps.push_back(Idx);
2535       else
2536         MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
2537     }
2538
2539     SDValue Res = DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
2540                                        &MappedOps[0]);
2541     setValue(&I, Res);
2542
2543     if (DisableScheduling) {
2544       DAG.AssignOrdering(Src1.getNode(), SDNodeOrder);
2545       DAG.AssignOrdering(Src2.getNode(), SDNodeOrder);
2546       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2547     }
2548
2549     return;
2550   }
2551
2552   if (SrcNumElts > MaskNumElts) {
2553     // Analyze the access pattern of the vector to see if we can extract
2554     // two subvectors and do the shuffle. The analysis is done by calculating
2555     // the range of elements the mask access on both vectors.
2556     int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
2557     int MaxRange[2] = {-1, -1};
2558
2559     for (unsigned i = 0; i != MaskNumElts; ++i) {
2560       int Idx = Mask[i];
2561       int Input = 0;
2562       if (Idx < 0)
2563         continue;
2564
2565       if (Idx >= (int)SrcNumElts) {
2566         Input = 1;
2567         Idx -= SrcNumElts;
2568       }
2569       if (Idx > MaxRange[Input])
2570         MaxRange[Input] = Idx;
2571       if (Idx < MinRange[Input])
2572         MinRange[Input] = Idx;
2573     }
2574
2575     // Check if the access is smaller than the vector size and can we find
2576     // a reasonable extract index.
2577     int RangeUse[2] = { 2, 2 };  // 0 = Unused, 1 = Extract, 2 = Can not Extract.
2578     int StartIdx[2];  // StartIdx to extract from
2579     for (int Input=0; Input < 2; ++Input) {
2580       if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
2581         RangeUse[Input] = 0; // Unused
2582         StartIdx[Input] = 0;
2583       } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
2584         // Fits within range but we should see if we can find a good
2585         // start index that is a multiple of the mask length.
2586         if (MaxRange[Input] < (int)MaskNumElts) {
2587           RangeUse[Input] = 1; // Extract from beginning of the vector
2588           StartIdx[Input] = 0;
2589         } else {
2590           StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
2591           if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
2592               StartIdx[Input] + MaskNumElts < SrcNumElts)
2593             RangeUse[Input] = 1; // Extract from a multiple of the mask length.
2594         }
2595       }
2596     }
2597
2598     if (RangeUse[0] == 0 && RangeUse[1] == 0) {
2599       SDValue Res = DAG.getUNDEF(VT);
2600       setValue(&I, Res);  // Vectors are not used.
2601
2602       if (DisableScheduling)
2603         DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2604
2605       return;
2606     }
2607     else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
2608       // Extract appropriate subvector and generate a vector shuffle
2609       for (int Input=0; Input < 2; ++Input) {
2610         SDValue &Src = Input == 0 ? Src1 : Src2;
2611         if (RangeUse[Input] == 0)
2612           Src = DAG.getUNDEF(VT);
2613         else
2614           Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,
2615                             Src, DAG.getIntPtrConstant(StartIdx[Input]));
2616
2617         if (DisableScheduling)
2618           DAG.AssignOrdering(Src.getNode(), SDNodeOrder);
2619       }
2620
2621       // Calculate new mask.
2622       SmallVector<int, 8> MappedOps;
2623       for (unsigned i = 0; i != MaskNumElts; ++i) {
2624         int Idx = Mask[i];
2625         if (Idx < 0)
2626           MappedOps.push_back(Idx);
2627         else if (Idx < (int)SrcNumElts)
2628           MappedOps.push_back(Idx - StartIdx[0]);
2629         else
2630           MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
2631       }
2632
2633       SDValue Res = DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
2634                                          &MappedOps[0]);
2635       setValue(&I, Res);
2636
2637       if (DisableScheduling)
2638         DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2639
2640       return;
2641     }
2642   }
2643
2644   // We can't use either concat vectors or extract subvectors so fall back to
2645   // replacing the shuffle with extract and build vector.
2646   // to insert and build vector.
2647   EVT EltVT = VT.getVectorElementType();
2648   EVT PtrVT = TLI.getPointerTy();
2649   SmallVector<SDValue,8> Ops;
2650   for (unsigned i = 0; i != MaskNumElts; ++i) {
2651     if (Mask[i] < 0) {
2652       Ops.push_back(DAG.getUNDEF(EltVT));
2653     } else {
2654       int Idx = Mask[i];
2655       SDValue Res;
2656
2657       if (Idx < (int)SrcNumElts)
2658         Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
2659                           EltVT, Src1, DAG.getConstant(Idx, PtrVT));
2660       else
2661         Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
2662                           EltVT, Src2,
2663                           DAG.getConstant(Idx - SrcNumElts, PtrVT));
2664
2665       Ops.push_back(Res);
2666
2667       if (DisableScheduling)
2668         DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2669     }
2670   }
2671
2672   SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
2673                             VT, &Ops[0], Ops.size());
2674   setValue(&I, Res);
2675
2676   if (DisableScheduling)
2677     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2678 }
2679
2680 void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) {
2681   const Value *Op0 = I.getOperand(0);
2682   const Value *Op1 = I.getOperand(1);
2683   const Type *AggTy = I.getType();
2684   const Type *ValTy = Op1->getType();
2685   bool IntoUndef = isa<UndefValue>(Op0);
2686   bool FromUndef = isa<UndefValue>(Op1);
2687
2688   unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
2689                                             I.idx_begin(), I.idx_end());
2690
2691   SmallVector<EVT, 4> AggValueVTs;
2692   ComputeValueVTs(TLI, AggTy, AggValueVTs);
2693   SmallVector<EVT, 4> ValValueVTs;
2694   ComputeValueVTs(TLI, ValTy, ValValueVTs);
2695
2696   unsigned NumAggValues = AggValueVTs.size();
2697   unsigned NumValValues = ValValueVTs.size();
2698   SmallVector<SDValue, 4> Values(NumAggValues);
2699
2700   SDValue Agg = getValue(Op0);
2701   SDValue Val = getValue(Op1);
2702   unsigned i = 0;
2703   // Copy the beginning value(s) from the original aggregate.
2704   for (; i != LinearIndex; ++i)
2705     Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
2706                 SDValue(Agg.getNode(), Agg.getResNo() + i);
2707   // Copy values from the inserted value(s).
2708   for (; i != LinearIndex + NumValValues; ++i)
2709     Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
2710                 SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
2711   // Copy remaining value(s) from the original aggregate.
2712   for (; i != NumAggValues; ++i)
2713     Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
2714                 SDValue(Agg.getNode(), Agg.getResNo() + i);
2715
2716   SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
2717                             DAG.getVTList(&AggValueVTs[0], NumAggValues),
2718                             &Values[0], NumAggValues);
2719   setValue(&I, Res);
2720
2721   if (DisableScheduling)
2722     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2723 }
2724
2725 void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) {
2726   const Value *Op0 = I.getOperand(0);
2727   const Type *AggTy = Op0->getType();
2728   const Type *ValTy = I.getType();
2729   bool OutOfUndef = isa<UndefValue>(Op0);
2730
2731   unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
2732                                             I.idx_begin(), I.idx_end());
2733
2734   SmallVector<EVT, 4> ValValueVTs;
2735   ComputeValueVTs(TLI, ValTy, ValValueVTs);
2736
2737   unsigned NumValValues = ValValueVTs.size();
2738   SmallVector<SDValue, 4> Values(NumValValues);
2739
2740   SDValue Agg = getValue(Op0);
2741   // Copy out the selected value(s).
2742   for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
2743     Values[i - LinearIndex] =
2744       OutOfUndef ?
2745         DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
2746         SDValue(Agg.getNode(), Agg.getResNo() + i);
2747
2748   SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
2749                             DAG.getVTList(&ValValueVTs[0], NumValValues),
2750                             &Values[0], NumValValues);
2751   setValue(&I, Res);
2752
2753   if (DisableScheduling)
2754     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2755 }
2756
2757 void SelectionDAGBuilder::visitGetElementPtr(User &I) {
2758   SDValue N = getValue(I.getOperand(0));
2759   const Type *Ty = I.getOperand(0)->getType();
2760
2761   for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end();
2762        OI != E; ++OI) {
2763     Value *Idx = *OI;
2764     if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
2765       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
2766       if (Field) {
2767         // N = N + Offset
2768         uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
2769         N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
2770                         DAG.getIntPtrConstant(Offset));
2771
2772         if (DisableScheduling)
2773           DAG.AssignOrdering(N.getNode(), SDNodeOrder);
2774       }
2775
2776       Ty = StTy->getElementType(Field);
2777     } else {
2778       Ty = cast<SequentialType>(Ty)->getElementType();
2779
2780       // If this is a constant subscript, handle it quickly.
2781       if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
2782         if (CI->getZExtValue() == 0) continue;
2783         uint64_t Offs =
2784             TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
2785         SDValue OffsVal;
2786         EVT PTy = TLI.getPointerTy();
2787         unsigned PtrBits = PTy.getSizeInBits();
2788         if (PtrBits < 64)
2789           OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
2790                                 TLI.getPointerTy(),
2791                                 DAG.getConstant(Offs, MVT::i64));
2792         else
2793           OffsVal = DAG.getIntPtrConstant(Offs);
2794
2795         N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
2796                         OffsVal);
2797
2798         if (DisableScheduling) {
2799           DAG.AssignOrdering(OffsVal.getNode(), SDNodeOrder);
2800           DAG.AssignOrdering(N.getNode(), SDNodeOrder);
2801         }
2802
2803         continue;
2804       }
2805
2806       // N = N + Idx * ElementSize;
2807       APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(),
2808                                 TD->getTypeAllocSize(Ty));
2809       SDValue IdxN = getValue(Idx);
2810
2811       // If the index is smaller or larger than intptr_t, truncate or extend
2812       // it.
2813       IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType());
2814
2815       // If this is a multiply by a power of two, turn it into a shl
2816       // immediately.  This is a very common case.
2817       if (ElementSize != 1) {
2818         if (ElementSize.isPowerOf2()) {
2819           unsigned Amt = ElementSize.logBase2();
2820           IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
2821                              N.getValueType(), IdxN,
2822                              DAG.getConstant(Amt, TLI.getPointerTy()));
2823         } else {
2824           SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
2825           IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
2826                              N.getValueType(), IdxN, Scale);
2827         }
2828
2829         if (DisableScheduling)
2830           DAG.AssignOrdering(IdxN.getNode(), SDNodeOrder);
2831       }
2832
2833       N = DAG.getNode(ISD::ADD, getCurDebugLoc(),
2834                       N.getValueType(), N, IdxN);
2835
2836       if (DisableScheduling)
2837         DAG.AssignOrdering(N.getNode(), SDNodeOrder);
2838     }
2839   }
2840
2841   setValue(&I, N);
2842 }
2843
2844 void SelectionDAGBuilder::visitAlloca(AllocaInst &I) {
2845   // If this is a fixed sized alloca in the entry block of the function,
2846   // allocate it statically on the stack.
2847   if (FuncInfo.StaticAllocaMap.count(&I))
2848     return;   // getValue will auto-populate this.
2849
2850   const Type *Ty = I.getAllocatedType();
2851   uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
2852   unsigned Align =
2853     std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
2854              I.getAlignment());
2855
2856   SDValue AllocSize = getValue(I.getArraySize());
2857
2858   AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(),
2859                           AllocSize,
2860                           DAG.getConstant(TySize, AllocSize.getValueType()));
2861
2862   if (DisableScheduling)
2863     DAG.AssignOrdering(AllocSize.getNode(), SDNodeOrder);
2864
2865   EVT IntPtr = TLI.getPointerTy();
2866   AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
2867
2868   if (DisableScheduling)
2869     DAG.AssignOrdering(AllocSize.getNode(), SDNodeOrder);
2870
2871   // Handle alignment.  If the requested alignment is less than or equal to
2872   // the stack alignment, ignore it.  If the size is greater than or equal to
2873   // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
2874   unsigned StackAlign =
2875     TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
2876   if (Align <= StackAlign)
2877     Align = 0;
2878
2879   // Round the size of the allocation up to the stack alignment size
2880   // by add SA-1 to the size.
2881   AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),
2882                           AllocSize.getValueType(), AllocSize,
2883                           DAG.getIntPtrConstant(StackAlign-1));
2884   if (DisableScheduling)
2885     DAG.AssignOrdering(AllocSize.getNode(), SDNodeOrder);
2886
2887   // Mask out the low bits for alignment purposes.
2888   AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),
2889                           AllocSize.getValueType(), AllocSize,
2890                           DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
2891   if (DisableScheduling)
2892     DAG.AssignOrdering(AllocSize.getNode(), SDNodeOrder);
2893
2894   SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
2895   SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
2896   SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),
2897                             VTs, Ops, 3);
2898   setValue(&I, DSA);
2899   DAG.setRoot(DSA.getValue(1));
2900
2901   if (DisableScheduling)
2902     DAG.AssignOrdering(DSA.getNode(), SDNodeOrder);
2903
2904   // Inform the Frame Information that we have just allocated a variable-sized
2905   // object.
2906   FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject();
2907 }
2908
2909 void SelectionDAGBuilder::visitLoad(LoadInst &I) {
2910   const Value *SV = I.getOperand(0);
2911   SDValue Ptr = getValue(SV);
2912
2913   const Type *Ty = I.getType();
2914   bool isVolatile = I.isVolatile();
2915   unsigned Alignment = I.getAlignment();
2916
2917   SmallVector<EVT, 4> ValueVTs;
2918   SmallVector<uint64_t, 4> Offsets;
2919   ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
2920   unsigned NumValues = ValueVTs.size();
2921   if (NumValues == 0)
2922     return;
2923
2924   SDValue Root;
2925   bool ConstantMemory = false;
2926   if (I.isVolatile())
2927     // Serialize volatile loads with other side effects.
2928     Root = getRoot();
2929   else if (AA->pointsToConstantMemory(SV)) {
2930     // Do not serialize (non-volatile) loads of constant memory with anything.
2931     Root = DAG.getEntryNode();
2932     ConstantMemory = true;
2933   } else {
2934     // Do not serialize non-volatile loads against each other.
2935     Root = DAG.getRoot();
2936   }
2937
2938   SmallVector<SDValue, 4> Values(NumValues);
2939   SmallVector<SDValue, 4> Chains(NumValues);
2940   EVT PtrVT = Ptr.getValueType();
2941   for (unsigned i = 0; i != NumValues; ++i) {
2942     SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
2943                             PtrVT, Ptr,
2944                             DAG.getConstant(Offsets[i], PtrVT));
2945     SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
2946                             A, SV, Offsets[i], isVolatile, Alignment);
2947
2948     Values[i] = L;
2949     Chains[i] = L.getValue(1);
2950
2951     if (DisableScheduling) {
2952       DAG.AssignOrdering(A.getNode(), SDNodeOrder);
2953       DAG.AssignOrdering(L.getNode(), SDNodeOrder);
2954     }
2955   }
2956
2957   if (!ConstantMemory) {
2958     SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
2959                                 MVT::Other, &Chains[0], NumValues);
2960     if (isVolatile)
2961       DAG.setRoot(Chain);
2962     else
2963       PendingLoads.push_back(Chain);
2964
2965     if (DisableScheduling)
2966       DAG.AssignOrdering(Chain.getNode(), SDNodeOrder);
2967   }
2968
2969   SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
2970                             DAG.getVTList(&ValueVTs[0], NumValues),
2971                             &Values[0], NumValues);
2972   setValue(&I, Res);
2973
2974   if (DisableScheduling)
2975     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
2976 }
2977
2978 void SelectionDAGBuilder::visitStore(StoreInst &I) {
2979   Value *SrcV = I.getOperand(0);
2980   Value *PtrV = I.getOperand(1);
2981
2982   SmallVector<EVT, 4> ValueVTs;
2983   SmallVector<uint64_t, 4> Offsets;
2984   ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
2985   unsigned NumValues = ValueVTs.size();
2986   if (NumValues == 0)
2987     return;
2988
2989   // Get the lowered operands. Note that we do this after
2990   // checking if NumResults is zero, because with zero results
2991   // the operands won't have values in the map.
2992   SDValue Src = getValue(SrcV);
2993   SDValue Ptr = getValue(PtrV);
2994
2995   SDValue Root = getRoot();
2996   SmallVector<SDValue, 4> Chains(NumValues);
2997   EVT PtrVT = Ptr.getValueType();
2998   bool isVolatile = I.isVolatile();
2999   unsigned Alignment = I.getAlignment();
3000
3001   for (unsigned i = 0; i != NumValues; ++i) {
3002     SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
3003                               DAG.getConstant(Offsets[i], PtrVT));
3004     Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
3005                              SDValue(Src.getNode(), Src.getResNo() + i),
3006                              Add, PtrV, Offsets[i], isVolatile, Alignment);
3007
3008     if (DisableScheduling) {
3009       DAG.AssignOrdering(Add.getNode(), SDNodeOrder);
3010       DAG.AssignOrdering(Chains[i].getNode(), SDNodeOrder);
3011     }
3012   }
3013
3014   SDValue Res = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
3015                             MVT::Other, &Chains[0], NumValues);
3016   DAG.setRoot(Res);
3017
3018   if (DisableScheduling)
3019     DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
3020 }
3021
3022 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
3023 /// node.
3024 void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
3025                                                unsigned Intrinsic) {
3026   bool HasChain = !I.doesNotAccessMemory();
3027   bool OnlyLoad = HasChain && I.onlyReadsMemory();
3028
3029   // Build the operand list.
3030   SmallVector<SDValue, 8> Ops;
3031   if (HasChain) {  // If this intrinsic has side-effects, chainify it.
3032     if (OnlyLoad) {
3033       // We don't need to serialize loads against other loads.
3034       Ops.push_back(DAG.getRoot());
3035     } else {
3036       Ops.push_back(getRoot());
3037     }
3038   }
3039
3040   // Info is set by getTgtMemInstrinsic
3041   TargetLowering::IntrinsicInfo Info;
3042   bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
3043
3044   // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
3045   if (!IsTgtIntrinsic)
3046     Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
3047
3048   // Add all operands of the call to the operand list.
3049   for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
3050     SDValue Op = getValue(I.getOperand(i));
3051     assert(TLI.isTypeLegal(Op.getValueType()) &&
3052            "Intrinsic uses a non-legal type?");
3053     Ops.push_back(Op);
3054   }
3055
3056   SmallVector<EVT, 4> ValueVTs;
3057   ComputeValueVTs(TLI, I.getType(), ValueVTs);
3058 #ifndef NDEBUG
3059   for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
3060     assert(TLI.isTypeLegal(ValueVTs[Val]) &&
3061            "Intrinsic uses a non-legal type?");
3062   }
3063 #endif // NDEBUG
3064
3065   if (HasChain)
3066     ValueVTs.push_back(MVT::Other);
3067
3068   SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
3069
3070   // Create the node.
3071   SDValue Result;
3072   if (IsTgtIntrinsic) {
3073     // This is target intrinsic that touches memory
3074     Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
3075                                      VTs, &Ops[0], Ops.size(),
3076                                      Info.memVT, Info.ptrVal, Info.offset,
3077                                      Info.align, Info.vol,
3078                                      Info.readMem, Info.writeMem);
3079   } else if (!HasChain) {
3080     Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
3081                          VTs, &Ops[0], Ops.size());
3082   } else if (I.getType() != Type::getVoidTy(*DAG.getContext())) {
3083     Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
3084                          VTs, &Ops[0], Ops.size());
3085   } else {
3086     Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),
3087                          VTs, &Ops[0], Ops.size());
3088   }
3089
3090   if (DisableScheduling)
3091     DAG.AssignOrdering(Result.getNode(), SDNodeOrder);
3092
3093   if (HasChain) {
3094     SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
3095     if (OnlyLoad)
3096       PendingLoads.push_back(Chain);
3097     else
3098       DAG.setRoot(Chain);
3099   }
3100
3101   if (I.getType() != Type::getVoidTy(*DAG.getContext())) {
3102     if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
3103       EVT VT = TLI.getValueType(PTy);
3104       Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
3105
3106       if (DisableScheduling)
3107         DAG.AssignOrdering(Result.getNode(), SDNodeOrder);
3108     }
3109
3110     setValue(&I, Result);
3111   }
3112 }
3113
3114 /// GetSignificand - Get the significand and build it into a floating-point
3115 /// number with exponent of 1:
3116 ///
3117 ///   Op = (Op & 0x007fffff) | 0x3f800000;
3118 ///
3119 /// where Op is the hexidecimal representation of floating point value.
3120 static SDValue
3121 GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl, unsigned Order) {
3122   SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
3123                            DAG.getConstant(0x007fffff, MVT::i32));
3124   SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
3125                            DAG.getConstant(0x3f800000, MVT::i32));
3126   SDValue Res = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
3127
3128   if (DisableScheduling) {
3129     DAG.AssignOrdering(t1.getNode(), Order);
3130     DAG.AssignOrdering(t2.getNode(), Order);
3131     DAG.AssignOrdering(Res.getNode(), Order);
3132   }
3133
3134   return Res;
3135 }
3136
3137 /// GetExponent - Get the exponent:
3138 ///
3139 ///   (float)(int)(((Op & 0x7f800000) >> 23) - 127);
3140 ///
3141 /// where Op is the hexidecimal representation of floating point value.
3142 static SDValue
3143 GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
3144             DebugLoc dl, unsigned Order) {
3145   SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
3146                            DAG.getConstant(0x7f800000, MVT::i32));
3147   SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
3148                            DAG.getConstant(23, TLI.getPointerTy()));
3149   SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
3150                            DAG.getConstant(127, MVT::i32));
3151   SDValue Res = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
3152
3153   if (DisableScheduling) {
3154     DAG.AssignOrdering(t0.getNode(), Order);
3155     DAG.AssignOrdering(t1.getNode(), Order);
3156     DAG.AssignOrdering(t2.getNode(), Order);
3157     DAG.AssignOrdering(Res.getNode(), Order);
3158   }
3159
3160   return Res;
3161 }
3162
3163 /// getF32Constant - Get 32-bit floating point constant.
3164 static SDValue
3165 getF32Constant(SelectionDAG &DAG, unsigned Flt) {
3166   return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
3167 }
3168
3169 /// Inlined utility function to implement binary input atomic intrinsics for
3170 /// visitIntrinsicCall: I is a call instruction
3171 ///                     Op is the associated NodeType for I
3172 const char *
3173 SelectionDAGBuilder::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
3174   SDValue Root = getRoot();
3175   SDValue L =
3176     DAG.getAtomic(Op, getCurDebugLoc(),
3177                   getValue(I.getOperand(2)).getValueType().getSimpleVT(),
3178                   Root,
3179                   getValue(I.getOperand(1)),
3180                   getValue(I.getOperand(2)),
3181                   I.getOperand(1));
3182   setValue(&I, L);
3183   DAG.setRoot(L.getValue(1));
3184
3185   if (DisableScheduling)
3186     DAG.AssignOrdering(L.getNode(), SDNodeOrder);
3187
3188   return 0;
3189 }
3190
3191 // implVisitAluOverflow - Lower arithmetic overflow instrinsics.
3192 const char *
3193 SelectionDAGBuilder::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
3194   SDValue Op1 = getValue(I.getOperand(1));
3195   SDValue Op2 = getValue(I.getOperand(2));
3196
3197   SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
3198   SDValue Result = DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2);
3199
3200   setValue(&I, Result);
3201
3202   if (DisableScheduling)
3203     DAG.AssignOrdering(Result.getNode(), SDNodeOrder);
3204
3205   return 0;
3206 }
3207
3208 /// visitExp - Lower an exp intrinsic. Handles the special sequences for
3209 /// limited-precision mode.
3210 void
3211 SelectionDAGBuilder::visitExp(CallInst &I) {
3212   SDValue result;
3213   DebugLoc dl = getCurDebugLoc();
3214
3215   if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
3216       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3217     SDValue Op = getValue(I.getOperand(1));
3218
3219     // Put the exponent in the right bit position for later addition to the
3220     // final result:
3221     //
3222     //   #define LOG2OFe 1.4426950f
3223     //   IntegerPartOfX = ((int32_t)(X * LOG2OFe));
3224     SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
3225                              getF32Constant(DAG, 0x3fb8aa3b));
3226     SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
3227
3228     //   FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
3229     SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
3230     SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
3231
3232     if (DisableScheduling) {
3233       DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
3234       DAG.AssignOrdering(IntegerPartOfX.getNode(), SDNodeOrder);
3235       DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
3236       DAG.AssignOrdering(X.getNode(), SDNodeOrder);
3237     }
3238
3239     //   IntegerPartOfX <<= 23;
3240     IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
3241                                  DAG.getConstant(23, TLI.getPointerTy()));
3242
3243     if (DisableScheduling)
3244       DAG.AssignOrdering(IntegerPartOfX.getNode(), SDNodeOrder);
3245
3246     if (LimitFloatPrecision <= 6) {
3247       // For floating-point precision of 6:
3248       //
3249       //   TwoToFractionalPartOfX =
3250       //     0.997535578f +
3251       //       (0.735607626f + 0.252464424f * x) * x;
3252       //
3253       // error 0.0144103317, which is 6 bits
3254       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3255                                getF32Constant(DAG, 0x3e814304));
3256       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3257                                getF32Constant(DAG, 0x3f3c50c8));
3258       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3259       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3260                                getF32Constant(DAG, 0x3f7f5e7e));
3261       SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5);
3262
3263       // Add the exponent into the result in integer domain.
3264       SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3265                                TwoToFracPartOfX, IntegerPartOfX);
3266
3267       result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);
3268
3269       if (DisableScheduling) {
3270         DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
3271         DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
3272         DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
3273         DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
3274         DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
3275         DAG.AssignOrdering(TwoToFracPartOfX.getNode(), SDNodeOrder);
3276         DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3277       }
3278     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3279       // For floating-point precision of 12:
3280       //
3281       //   TwoToFractionalPartOfX =
3282       //     0.999892986f +
3283       //       (0.696457318f +
3284       //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
3285       //
3286       // 0.000107046256 error, which is 13 to 14 bits
3287       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3288                                getF32Constant(DAG, 0x3da235e3));
3289       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3290                                getF32Constant(DAG, 0x3e65b8f3));
3291       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3292       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3293                                getF32Constant(DAG, 0x3f324b07));
3294       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3295       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3296                                getF32Constant(DAG, 0x3f7ff8fd));
3297       SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7);
3298
3299       // Add the exponent into the result in integer domain.
3300       SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3301                                TwoToFracPartOfX, IntegerPartOfX);
3302
3303       result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);
3304
3305       if (DisableScheduling) {
3306         DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
3307         DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
3308         DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
3309         DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
3310         DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
3311         DAG.AssignOrdering(t7.getNode(), SDNodeOrder);
3312         DAG.AssignOrdering(t8.getNode(), SDNodeOrder);
3313         DAG.AssignOrdering(TwoToFracPartOfX.getNode(), SDNodeOrder);
3314         DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3315       }
3316     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3317       // For floating-point precision of 18:
3318       //
3319       //   TwoToFractionalPartOfX =
3320       //     0.999999982f +
3321       //       (0.693148872f +
3322       //         (0.240227044f +
3323       //           (0.554906021e-1f +
3324       //             (0.961591928e-2f +
3325       //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
3326       //
3327       // error 2.47208000*10^(-7), which is better than 18 bits
3328       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3329                                getF32Constant(DAG, 0x3924b03e));
3330       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3331                                getF32Constant(DAG, 0x3ab24b87));
3332       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3333       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3334                                getF32Constant(DAG, 0x3c1d8c17));
3335       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3336       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3337                                getF32Constant(DAG, 0x3d634a1d));
3338       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3339       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3340                                getF32Constant(DAG, 0x3e75fe14));
3341       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3342       SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
3343                                 getF32Constant(DAG, 0x3f317234));
3344       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
3345       SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
3346                                 getF32Constant(DAG, 0x3f800000));
3347       SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,
3348                                              MVT::i32, t13);
3349
3350       // Add the exponent into the result in integer domain.
3351       SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3352                                 TwoToFracPartOfX, IntegerPartOfX);
3353
3354       result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);
3355
3356       if (DisableScheduling) {
3357         DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
3358         DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
3359         DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
3360         DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
3361         DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
3362         DAG.AssignOrdering(t7.getNode(), SDNodeOrder);
3363         DAG.AssignOrdering(t8.getNode(), SDNodeOrder);
3364         DAG.AssignOrdering(t9.getNode(), SDNodeOrder);
3365         DAG.AssignOrdering(t10.getNode(), SDNodeOrder);
3366         DAG.AssignOrdering(t11.getNode(), SDNodeOrder);
3367         DAG.AssignOrdering(t12.getNode(), SDNodeOrder);
3368         DAG.AssignOrdering(t13.getNode(), SDNodeOrder);
3369         DAG.AssignOrdering(t14.getNode(), SDNodeOrder);
3370         DAG.AssignOrdering(TwoToFracPartOfX.getNode(), SDNodeOrder);
3371         DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3372       }
3373     }
3374   } else {
3375     // No special expansion.
3376     result = DAG.getNode(ISD::FEXP, dl,
3377                          getValue(I.getOperand(1)).getValueType(),
3378                          getValue(I.getOperand(1)));
3379     if (DisableScheduling)
3380       DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3381   }
3382
3383   setValue(&I, result);
3384 }
3385
3386 /// visitLog - Lower a log intrinsic. Handles the special sequences for
3387 /// limited-precision mode.
3388 void
3389 SelectionDAGBuilder::visitLog(CallInst &I) {
3390   SDValue result;
3391   DebugLoc dl = getCurDebugLoc();
3392
3393   if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
3394       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3395     SDValue Op = getValue(I.getOperand(1));
3396     SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
3397
3398     if (DisableScheduling)
3399       DAG.AssignOrdering(Op1.getNode(), SDNodeOrder);
3400
3401     // Scale the exponent by log(2) [0.69314718f].
3402     SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
3403     SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
3404                                         getF32Constant(DAG, 0x3f317218));
3405
3406     if (DisableScheduling)
3407       DAG.AssignOrdering(LogOfExponent.getNode(), SDNodeOrder);
3408
3409     // Get the significand and build it into a floating-point number with
3410     // exponent of 1.
3411     SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
3412
3413     if (LimitFloatPrecision <= 6) {
3414       // For floating-point precision of 6:
3415       //
3416       //   LogofMantissa =
3417       //     -1.1609546f +
3418       //       (1.4034025f - 0.23903021f * x) * x;
3419       //
3420       // error 0.0034276066, which is better than 8 bits
3421       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3422                                getF32Constant(DAG, 0xbe74c456));
3423       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3424                                getF32Constant(DAG, 0x3fb3a2b1));
3425       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3426       SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3427                                           getF32Constant(DAG, 0x3f949a29));
3428
3429       result = DAG.getNode(ISD::FADD, dl,
3430                            MVT::f32, LogOfExponent, LogOfMantissa);
3431
3432       if (DisableScheduling) {
3433         DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
3434         DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
3435         DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
3436         DAG.AssignOrdering(LogOfMantissa.getNode(), SDNodeOrder);
3437         DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3438       }
3439     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3440       // For floating-point precision of 12:
3441       //
3442       //   LogOfMantissa =
3443       //     -1.7417939f +
3444       //       (2.8212026f +
3445       //         (-1.4699568f +
3446       //           (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
3447       //
3448       // error 0.000061011436, which is 14 bits
3449       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3450                                getF32Constant(DAG, 0xbd67b6d6));
3451       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3452                                getF32Constant(DAG, 0x3ee4f4b8));
3453       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3454       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3455                                getF32Constant(DAG, 0x3fbc278b));
3456       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3457       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3458                                getF32Constant(DAG, 0x40348e95));
3459       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3460       SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3461                                           getF32Constant(DAG, 0x3fdef31a));
3462
3463       result = DAG.getNode(ISD::FADD, dl,
3464                            MVT::f32, LogOfExponent, LogOfMantissa);
3465
3466       if (DisableScheduling) {
3467         DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
3468         DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
3469         DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
3470         DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
3471         DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
3472         DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
3473         DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
3474         DAG.AssignOrdering(LogOfMantissa.getNode(), SDNodeOrder);
3475         DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3476       }
3477     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3478       // For floating-point precision of 18:
3479       //
3480       //   LogOfMantissa =
3481       //     -2.1072184f +
3482       //       (4.2372794f +
3483       //         (-3.7029485f +
3484       //           (2.2781945f +
3485       //             (-0.87823314f +
3486       //               (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
3487       //
3488       // error 0.0000023660568, which is better than 18 bits
3489       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3490                                getF32Constant(DAG, 0xbc91e5ac));
3491       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3492                                getF32Constant(DAG, 0x3e4350aa));
3493       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3494       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3495                                getF32Constant(DAG, 0x3f60d3e3));
3496       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3497       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3498                                getF32Constant(DAG, 0x4011cdf0));
3499       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3500       SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3501                                getF32Constant(DAG, 0x406cfd1c));
3502       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3503       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3504                                getF32Constant(DAG, 0x408797cb));
3505       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3506       SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
3507                                           getF32Constant(DAG, 0x4006dcab));
3508
3509       result = DAG.getNode(ISD::FADD, dl,
3510                            MVT::f32, LogOfExponent, LogOfMantissa);
3511
3512       if (DisableScheduling) {
3513         DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
3514         DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
3515         DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
3516         DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
3517         DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
3518         DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
3519         DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
3520         DAG.AssignOrdering(t7.getNode(), SDNodeOrder);
3521         DAG.AssignOrdering(t8.getNode(), SDNodeOrder);
3522         DAG.AssignOrdering(t9.getNode(), SDNodeOrder);
3523         DAG.AssignOrdering(t10.getNode(), SDNodeOrder);
3524         DAG.AssignOrdering(LogOfMantissa.getNode(), SDNodeOrder);
3525         DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3526       }
3527     }
3528   } else {
3529     // No special expansion.
3530     result = DAG.getNode(ISD::FLOG, dl,
3531                          getValue(I.getOperand(1)).getValueType(),
3532                          getValue(I.getOperand(1)));
3533
3534     if (DisableScheduling)
3535       DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3536   }
3537
3538   setValue(&I, result);
3539 }
3540
3541 /// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
3542 /// limited-precision mode.
3543 void
3544 SelectionDAGBuilder::visitLog2(CallInst &I) {
3545   SDValue result;
3546   DebugLoc dl = getCurDebugLoc();
3547
3548   if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
3549       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3550     SDValue Op = getValue(I.getOperand(1));
3551     SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
3552
3553     if (DisableScheduling)
3554       DAG.AssignOrdering(Op1.getNode(), SDNodeOrder);
3555
3556     // Get the exponent.
3557     SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
3558
3559     if (DisableScheduling)
3560       DAG.AssignOrdering(LogOfExponent.getNode(), SDNodeOrder);
3561
3562     // Get the significand and build it into a floating-point number with
3563     // exponent of 1.
3564     SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
3565
3566     // Different possible minimax approximations of significand in
3567     // floating-point for various degrees of accuracy over [1,2].
3568     if (LimitFloatPrecision <= 6) {
3569       // For floating-point precision of 6:
3570       //
3571       //   Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
3572       //
3573       // error 0.0049451742, which is more than 7 bits
3574       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3575                                getF32Constant(DAG, 0xbeb08fe0));
3576       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3577                                getF32Constant(DAG, 0x40019463));
3578       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3579       SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3580                                            getF32Constant(DAG, 0x3fd6633d));
3581
3582       result = DAG.getNode(ISD::FADD, dl,
3583                            MVT::f32, LogOfExponent, Log2ofMantissa);
3584
3585       if (DisableScheduling) {
3586         DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
3587         DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
3588         DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
3589         DAG.AssignOrdering(Log2ofMantissa.getNode(), SDNodeOrder);
3590         DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3591       }
3592     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3593       // For floating-point precision of 12:
3594       //
3595       //   Log2ofMantissa =
3596       //     -2.51285454f +
3597       //       (4.07009056f +
3598       //         (-2.12067489f +
3599       //           (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
3600       //
3601       // error 0.0000876136000, which is better than 13 bits
3602       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3603                                getF32Constant(DAG, 0xbda7262e));
3604       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3605                                getF32Constant(DAG, 0x3f25280b));
3606       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3607       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3608                                getF32Constant(DAG, 0x4007b923));
3609       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3610       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3611                                getF32Constant(DAG, 0x40823e2f));
3612       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3613       SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3614                                            getF32Constant(DAG, 0x4020d29c));
3615
3616       result = DAG.getNode(ISD::FADD, dl,
3617                            MVT::f32, LogOfExponent, Log2ofMantissa);
3618
3619       if (DisableScheduling) {
3620         DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
3621         DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
3622         DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
3623         DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
3624         DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
3625         DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
3626         DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
3627         DAG.AssignOrdering(Log2ofMantissa.getNode(), SDNodeOrder);
3628         DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3629       }
3630     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3631       // For floating-point precision of 18:
3632       //
3633       //   Log2ofMantissa =
3634       //     -3.0400495f +
3635       //       (6.1129976f +
3636       //         (-5.3420409f +
3637       //           (3.2865683f +
3638       //             (-1.2669343f +
3639       //               (0.27515199f -
3640       //                 0.25691327e-1f * x) * x) * x) * x) * x) * x;
3641       //
3642       // error 0.0000018516, which is better than 18 bits
3643       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3644                                getF32Constant(DAG, 0xbcd2769e));
3645       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3646                                getF32Constant(DAG, 0x3e8ce0b9));
3647       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3648       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3649                                getF32Constant(DAG, 0x3fa22ae7));
3650       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3651       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3652                                getF32Constant(DAG, 0x40525723));
3653       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3654       SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3655                                getF32Constant(DAG, 0x40aaf200));
3656       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3657       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3658                                getF32Constant(DAG, 0x40c39dad));
3659       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3660       SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
3661                                            getF32Constant(DAG, 0x4042902c));
3662
3663       result = DAG.getNode(ISD::FADD, dl,
3664                            MVT::f32, LogOfExponent, Log2ofMantissa);
3665
3666       if (DisableScheduling) {
3667         DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
3668         DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
3669         DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
3670         DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
3671         DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
3672         DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
3673         DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
3674         DAG.AssignOrdering(t7.getNode(), SDNodeOrder);
3675         DAG.AssignOrdering(t8.getNode(), SDNodeOrder);
3676         DAG.AssignOrdering(t9.getNode(), SDNodeOrder);
3677         DAG.AssignOrdering(t10.getNode(), SDNodeOrder);
3678         DAG.AssignOrdering(Log2ofMantissa.getNode(), SDNodeOrder);
3679         DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3680       }
3681     }
3682   } else {
3683     // No special expansion.
3684     result = DAG.getNode(ISD::FLOG2, dl,
3685                          getValue(I.getOperand(1)).getValueType(),
3686                          getValue(I.getOperand(1)));
3687
3688     if (DisableScheduling)
3689       DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3690   }
3691
3692   setValue(&I, result);
3693 }
3694
3695 /// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
3696 /// limited-precision mode.
3697 void
3698 SelectionDAGBuilder::visitLog10(CallInst &I) {
3699   SDValue result;
3700   DebugLoc dl = getCurDebugLoc();
3701
3702   if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
3703       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3704     SDValue Op = getValue(I.getOperand(1));
3705     SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
3706
3707     if (DisableScheduling)
3708       DAG.AssignOrdering(Op1.getNode(), SDNodeOrder);
3709
3710     // Scale the exponent by log10(2) [0.30102999f].
3711     SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
3712     SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
3713                                         getF32Constant(DAG, 0x3e9a209a));
3714
3715     if (DisableScheduling)
3716       DAG.AssignOrdering(LogOfExponent.getNode(), SDNodeOrder);
3717
3718     // Get the significand and build it into a floating-point number with
3719     // exponent of 1.
3720     SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
3721
3722     if (LimitFloatPrecision <= 6) {
3723       // For floating-point precision of 6:
3724       //
3725       //   Log10ofMantissa =
3726       //     -0.50419619f +
3727       //       (0.60948995f - 0.10380950f * x) * x;
3728       //
3729       // error 0.0014886165, which is 6 bits
3730       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3731                                getF32Constant(DAG, 0xbdd49a13));
3732       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3733                                getF32Constant(DAG, 0x3f1c0789));
3734       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3735       SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3736                                             getF32Constant(DAG, 0x3f011300));
3737
3738       result = DAG.getNode(ISD::FADD, dl,
3739                            MVT::f32, LogOfExponent, Log10ofMantissa);
3740
3741       if (DisableScheduling) {
3742         DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
3743         DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
3744         DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
3745         DAG.AssignOrdering(Log10ofMantissa.getNode(), SDNodeOrder);
3746         DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3747       }
3748     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3749       // For floating-point precision of 12:
3750       //
3751       //   Log10ofMantissa =
3752       //     -0.64831180f +
3753       //       (0.91751397f +
3754       //         (-0.31664806f + 0.47637168e-1f * x) * x) * x;
3755       //
3756       // error 0.00019228036, which is better than 12 bits
3757       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3758                                getF32Constant(DAG, 0x3d431f31));
3759       SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
3760                                getF32Constant(DAG, 0x3ea21fb2));
3761       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3762       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3763                                getF32Constant(DAG, 0x3f6ae232));
3764       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3765       SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
3766                                             getF32Constant(DAG, 0x3f25f7c3));
3767
3768       result = DAG.getNode(ISD::FADD, dl,
3769                            MVT::f32, LogOfExponent, Log10ofMantissa);
3770
3771       if (DisableScheduling) {
3772         DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
3773         DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
3774         DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
3775         DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
3776         DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
3777         DAG.AssignOrdering(Log10ofMantissa.getNode(), SDNodeOrder);
3778         DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3779       }
3780     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3781       // For floating-point precision of 18:
3782       //
3783       //   Log10ofMantissa =
3784       //     -0.84299375f +
3785       //       (1.5327582f +
3786       //         (-1.0688956f +
3787       //           (0.49102474f +
3788       //             (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
3789       //
3790       // error 0.0000037995730, which is better than 18 bits
3791       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3792                                getF32Constant(DAG, 0x3c5d51ce));
3793       SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
3794                                getF32Constant(DAG, 0x3e00685a));
3795       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3796       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3797                                getF32Constant(DAG, 0x3efb6798));
3798       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3799       SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
3800                                getF32Constant(DAG, 0x3f88d192));
3801       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3802       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3803                                getF32Constant(DAG, 0x3fc4316c));
3804       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3805       SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
3806                                             getF32Constant(DAG, 0x3f57ce70));
3807
3808       result = DAG.getNode(ISD::FADD, dl,
3809                            MVT::f32, LogOfExponent, Log10ofMantissa);
3810
3811       if (DisableScheduling) {
3812         DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
3813         DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
3814         DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
3815         DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
3816         DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
3817         DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
3818         DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
3819         DAG.AssignOrdering(t7.getNode(), SDNodeOrder);
3820         DAG.AssignOrdering(t8.getNode(), SDNodeOrder);
3821         DAG.AssignOrdering(Log10ofMantissa.getNode(), SDNodeOrder);
3822         DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3823       }
3824     }
3825   } else {
3826     // No special expansion.
3827     result = DAG.getNode(ISD::FLOG10, dl,
3828                          getValue(I.getOperand(1)).getValueType(),
3829                          getValue(I.getOperand(1)));
3830
3831     if (DisableScheduling)
3832       DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3833   }
3834
3835   setValue(&I, result);
3836 }
3837
3838 /// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
3839 /// limited-precision mode.
3840 void
3841 SelectionDAGBuilder::visitExp2(CallInst &I) {
3842   SDValue result;
3843   DebugLoc dl = getCurDebugLoc();
3844
3845   if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
3846       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3847     SDValue Op = getValue(I.getOperand(1));
3848
3849     SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
3850
3851     if (DisableScheduling)
3852       DAG.AssignOrdering(IntegerPartOfX.getNode(), SDNodeOrder);
3853
3854     //   FractionalPartOfX = x - (float)IntegerPartOfX;
3855     SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
3856     SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
3857
3858     //   IntegerPartOfX <<= 23;
3859     IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
3860                                  DAG.getConstant(23, TLI.getPointerTy()));
3861
3862     if (DisableScheduling) {
3863       DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
3864       DAG.AssignOrdering(X.getNode(), SDNodeOrder);
3865       DAG.AssignOrdering(IntegerPartOfX.getNode(), SDNodeOrder);
3866     }
3867
3868     if (LimitFloatPrecision <= 6) {
3869       // For floating-point precision of 6:
3870       //
3871       //   TwoToFractionalPartOfX =
3872       //     0.997535578f +
3873       //       (0.735607626f + 0.252464424f * x) * x;
3874       //
3875       // error 0.0144103317, which is 6 bits
3876       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3877                                getF32Constant(DAG, 0x3e814304));
3878       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3879                                getF32Constant(DAG, 0x3f3c50c8));
3880       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3881       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3882                                getF32Constant(DAG, 0x3f7f5e7e));
3883       SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
3884       SDValue TwoToFractionalPartOfX =
3885         DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
3886
3887       result = DAG.getNode(ISD::BIT_CONVERT, dl,
3888                            MVT::f32, TwoToFractionalPartOfX);
3889
3890       if (DisableScheduling) {
3891         DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
3892         DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
3893         DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
3894         DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
3895         DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
3896         DAG.AssignOrdering(TwoToFractionalPartOfX.getNode(), SDNodeOrder);
3897         DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3898       }
3899     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3900       // For floating-point precision of 12:
3901       //
3902       //   TwoToFractionalPartOfX =
3903       //     0.999892986f +
3904       //       (0.696457318f +
3905       //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
3906       //
3907       // error 0.000107046256, which is 13 to 14 bits
3908       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3909                                getF32Constant(DAG, 0x3da235e3));
3910       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3911                                getF32Constant(DAG, 0x3e65b8f3));
3912       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3913       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3914                                getF32Constant(DAG, 0x3f324b07));
3915       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3916       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3917                                getF32Constant(DAG, 0x3f7ff8fd));
3918       SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
3919       SDValue TwoToFractionalPartOfX =
3920         DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
3921
3922       result = DAG.getNode(ISD::BIT_CONVERT, dl,
3923                            MVT::f32, TwoToFractionalPartOfX);
3924
3925       if (DisableScheduling) {
3926         DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
3927         DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
3928         DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
3929         DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
3930         DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
3931         DAG.AssignOrdering(t7.getNode(), SDNodeOrder);
3932         DAG.AssignOrdering(t8.getNode(), SDNodeOrder);
3933         DAG.AssignOrdering(TwoToFractionalPartOfX.getNode(), SDNodeOrder);
3934         DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3935       }
3936     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3937       // For floating-point precision of 18:
3938       //
3939       //   TwoToFractionalPartOfX =
3940       //     0.999999982f +
3941       //       (0.693148872f +
3942       //         (0.240227044f +
3943       //           (0.554906021e-1f +
3944       //             (0.961591928e-2f +
3945       //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
3946       // error 2.47208000*10^(-7), which is better than 18 bits
3947       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3948                                getF32Constant(DAG, 0x3924b03e));
3949       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3950                                getF32Constant(DAG, 0x3ab24b87));
3951       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3952       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3953                                getF32Constant(DAG, 0x3c1d8c17));
3954       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3955       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3956                                getF32Constant(DAG, 0x3d634a1d));
3957       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3958       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3959                                getF32Constant(DAG, 0x3e75fe14));
3960       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3961       SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
3962                                 getF32Constant(DAG, 0x3f317234));
3963       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
3964       SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
3965                                 getF32Constant(DAG, 0x3f800000));
3966       SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
3967       SDValue TwoToFractionalPartOfX =
3968         DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
3969
3970       result = DAG.getNode(ISD::BIT_CONVERT, dl,
3971                            MVT::f32, TwoToFractionalPartOfX);
3972
3973       if (DisableScheduling) {
3974         DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
3975         DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
3976         DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
3977         DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
3978         DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
3979         DAG.AssignOrdering(t7.getNode(), SDNodeOrder);
3980         DAG.AssignOrdering(t8.getNode(), SDNodeOrder);
3981         DAG.AssignOrdering(t9.getNode(), SDNodeOrder);
3982         DAG.AssignOrdering(t10.getNode(), SDNodeOrder);
3983         DAG.AssignOrdering(t11.getNode(), SDNodeOrder);
3984         DAG.AssignOrdering(t12.getNode(), SDNodeOrder);
3985         DAG.AssignOrdering(t13.getNode(), SDNodeOrder);
3986         DAG.AssignOrdering(t14.getNode(), SDNodeOrder);
3987         DAG.AssignOrdering(TwoToFractionalPartOfX.getNode(), SDNodeOrder);
3988         DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3989       }
3990     }
3991   } else {
3992     // No special expansion.
3993     result = DAG.getNode(ISD::FEXP2, dl,
3994                          getValue(I.getOperand(1)).getValueType(),
3995                          getValue(I.getOperand(1)));
3996
3997     if (DisableScheduling)
3998       DAG.AssignOrdering(result.getNode(), SDNodeOrder);
3999   }
4000
4001   setValue(&I, result);
4002 }
4003
4004 /// visitPow - Lower a pow intrinsic. Handles the special sequences for
4005 /// limited-precision mode with x == 10.0f.
4006 void
4007 SelectionDAGBuilder::visitPow(CallInst &I) {
4008   SDValue result;
4009   Value *Val = I.getOperand(1);
4010   DebugLoc dl = getCurDebugLoc();
4011   bool IsExp10 = false;
4012
4013   if (getValue(Val).getValueType() == MVT::f32 &&
4014       getValue(I.getOperand(2)).getValueType() == MVT::f32 &&
4015       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4016     if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
4017       if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
4018         APFloat Ten(10.0f);
4019         IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);
4020       }
4021     }
4022   }
4023
4024   if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4025     SDValue Op = getValue(I.getOperand(2));
4026
4027     // Put the exponent in the right bit position for later addition to the
4028     // final result:
4029     //
4030     //   #define LOG2OF10 3.3219281f
4031     //   IntegerPartOfX = (int32_t)(x * LOG2OF10);
4032     SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
4033                              getF32Constant(DAG, 0x40549a78));
4034     SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
4035
4036     //   FractionalPartOfX = x - (float)IntegerPartOfX;
4037     SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
4038     SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
4039
4040     if (DisableScheduling) {
4041       DAG.AssignOrdering(t0.getNode(), SDNodeOrder);
4042       DAG.AssignOrdering(t1.getNode(), SDNodeOrder);
4043       DAG.AssignOrdering(IntegerPartOfX.getNode(), SDNodeOrder);
4044       DAG.AssignOrdering(X.getNode(), SDNodeOrder);
4045     }
4046
4047     //   IntegerPartOfX <<= 23;
4048     IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
4049                                  DAG.getConstant(23, TLI.getPointerTy()));
4050
4051     if (DisableScheduling)
4052       DAG.AssignOrdering(IntegerPartOfX.getNode(), SDNodeOrder);
4053
4054     if (LimitFloatPrecision <= 6) {
4055       // For floating-point precision of 6:
4056       //
4057       //   twoToFractionalPartOfX =
4058       //     0.997535578f +
4059       //       (0.735607626f + 0.252464424f * x) * x;
4060       //
4061       // error 0.0144103317, which is 6 bits
4062       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4063                                getF32Constant(DAG, 0x3e814304));
4064       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4065                                getF32Constant(DAG, 0x3f3c50c8));
4066       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4067       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4068                                getF32Constant(DAG, 0x3f7f5e7e));
4069       SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
4070       SDValue TwoToFractionalPartOfX =
4071         DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
4072
4073       result = DAG.getNode(ISD::BIT_CONVERT, dl,
4074                            MVT::f32, TwoToFractionalPartOfX);
4075
4076       if (DisableScheduling) {
4077         DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
4078         DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
4079         DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
4080         DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
4081         DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
4082         DAG.AssignOrdering(TwoToFractionalPartOfX.getNode(), SDNodeOrder);
4083         DAG.AssignOrdering(result.getNode(), SDNodeOrder);
4084       }
4085     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
4086       // For floating-point precision of 12:
4087       //
4088       //   TwoToFractionalPartOfX =
4089       //     0.999892986f +
4090       //       (0.696457318f +
4091       //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
4092       //
4093       // error 0.000107046256, which is 13 to 14 bits
4094       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4095                                getF32Constant(DAG, 0x3da235e3));
4096       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4097                                getF32Constant(DAG, 0x3e65b8f3));
4098       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4099       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4100                                getF32Constant(DAG, 0x3f324b07));
4101       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4102       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4103                                getF32Constant(DAG, 0x3f7ff8fd));
4104       SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
4105       SDValue TwoToFractionalPartOfX =
4106         DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
4107
4108       result = DAG.getNode(ISD::BIT_CONVERT, dl,
4109                            MVT::f32, TwoToFractionalPartOfX);
4110
4111       if (DisableScheduling) {
4112         DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
4113         DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
4114         DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
4115         DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
4116         DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
4117         DAG.AssignOrdering(t7.getNode(), SDNodeOrder);
4118         DAG.AssignOrdering(t8.getNode(), SDNodeOrder);
4119         DAG.AssignOrdering(TwoToFractionalPartOfX.getNode(), SDNodeOrder);
4120         DAG.AssignOrdering(result.getNode(), SDNodeOrder);
4121       }
4122     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
4123       // For floating-point precision of 18:
4124       //
4125       //   TwoToFractionalPartOfX =
4126       //     0.999999982f +
4127       //       (0.693148872f +
4128       //         (0.240227044f +
4129       //           (0.554906021e-1f +
4130       //             (0.961591928e-2f +
4131       //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
4132       // error 2.47208000*10^(-7), which is better than 18 bits
4133       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4134                                getF32Constant(DAG, 0x3924b03e));
4135       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4136                                getF32Constant(DAG, 0x3ab24b87));
4137       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4138       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4139                                getF32Constant(DAG, 0x3c1d8c17));
4140       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4141       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4142                                getF32Constant(DAG, 0x3d634a1d));
4143       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4144       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4145                                getF32Constant(DAG, 0x3e75fe14));
4146       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4147       SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
4148                                 getF32Constant(DAG, 0x3f317234));
4149       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
4150       SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
4151                                 getF32Constant(DAG, 0x3f800000));
4152       SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
4153       SDValue TwoToFractionalPartOfX =
4154         DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
4155
4156       result = DAG.getNode(ISD::BIT_CONVERT, dl,
4157                            MVT::f32, TwoToFractionalPartOfX);
4158
4159       if (DisableScheduling) {
4160         DAG.AssignOrdering(t2.getNode(), SDNodeOrder);
4161         DAG.AssignOrdering(t3.getNode(), SDNodeOrder);
4162         DAG.AssignOrdering(t4.getNode(), SDNodeOrder);
4163         DAG.AssignOrdering(t5.getNode(), SDNodeOrder);
4164         DAG.AssignOrdering(t6.getNode(), SDNodeOrder);
4165         DAG.AssignOrdering(t7.getNode(), SDNodeOrder);
4166         DAG.AssignOrdering(t8.getNode(), SDNodeOrder);
4167         DAG.AssignOrdering(t9.getNode(), SDNodeOrder);
4168         DAG.AssignOrdering(t10.getNode(), SDNodeOrder);
4169         DAG.AssignOrdering(t11.getNode(), SDNodeOrder);
4170         DAG.AssignOrdering(t12.getNode(), SDNodeOrder);
4171         DAG.AssignOrdering(t13.getNode(), SDNodeOrder);
4172         DAG.AssignOrdering(t14.getNode(), SDNodeOrder);
4173         DAG.AssignOrdering(TwoToFractionalPartOfX.getNode(), SDNodeOrder);
4174         DAG.AssignOrdering(result.getNode(), SDNodeOrder);
4175       }
4176     }
4177   } else {
4178     // No special expansion.
4179     result = DAG.getNode(ISD::FPOW, dl,
4180                          getValue(I.getOperand(1)).getValueType(),
4181                          getValue(I.getOperand(1)),
4182                          getValue(I.getOperand(2)));
4183
4184     if (DisableScheduling)
4185       DAG.AssignOrdering(result.getNode(), SDNodeOrder);
4186   }
4187
4188   setValue(&I, result);
4189 }
4190
4191 /// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
4192 /// we want to emit this as a call to a named external function, return the name
4193 /// otherwise lower it and return null.
4194 const char *
4195 SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
4196   DebugLoc dl = getCurDebugLoc();
4197   SDValue Res;
4198
4199   switch (Intrinsic) {
4200   default:
4201     // By default, turn this into a target intrinsic node.
4202     visitTargetIntrinsic(I, Intrinsic);
4203     return 0;
4204   case Intrinsic::vastart:  visitVAStart(I); return 0;
4205   case Intrinsic::vaend:    visitVAEnd(I); return 0;
4206   case Intrinsic::vacopy:   visitVACopy(I); return 0;
4207   case Intrinsic::returnaddress:
4208     Res = DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
4209                       getValue(I.getOperand(1)));
4210     setValue(&I, Res);
4211     if (DisableScheduling)
4212       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4213     return 0;
4214   case Intrinsic::frameaddress:
4215     Res = DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
4216                       getValue(I.getOperand(1)));
4217     setValue(&I, Res);
4218     if (DisableScheduling)
4219       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4220     return 0;
4221   case Intrinsic::setjmp:
4222     return "_setjmp"+!TLI.usesUnderscoreSetJmp();
4223   case Intrinsic::longjmp:
4224     return "_longjmp"+!TLI.usesUnderscoreLongJmp();
4225   case Intrinsic::memcpy: {
4226     SDValue Op1 = getValue(I.getOperand(1));
4227     SDValue Op2 = getValue(I.getOperand(2));
4228     SDValue Op3 = getValue(I.getOperand(3));
4229     unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
4230     Res = DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
4231                         I.getOperand(1), 0, I.getOperand(2), 0);
4232     DAG.setRoot(Res);
4233     if (DisableScheduling)
4234       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4235     return 0;
4236   }
4237   case Intrinsic::memset: {
4238     SDValue Op1 = getValue(I.getOperand(1));
4239     SDValue Op2 = getValue(I.getOperand(2));
4240     SDValue Op3 = getValue(I.getOperand(3));
4241     unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
4242     Res = DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align,
4243                         I.getOperand(1), 0);
4244     DAG.setRoot(Res);
4245     if (DisableScheduling)
4246       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4247     return 0;
4248   }
4249   case Intrinsic::memmove: {
4250     SDValue Op1 = getValue(I.getOperand(1));
4251     SDValue Op2 = getValue(I.getOperand(2));
4252     SDValue Op3 = getValue(I.getOperand(3));
4253     unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
4254
4255     // If the source and destination are known to not be aliases, we can
4256     // lower memmove as memcpy.
4257     uint64_t Size = -1ULL;
4258     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
4259       Size = C->getZExtValue();
4260     if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
4261         AliasAnalysis::NoAlias) {
4262       Res = DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
4263                           I.getOperand(1), 0, I.getOperand(2), 0);
4264       DAG.setRoot(Res);
4265       if (DisableScheduling)
4266         DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4267       return 0;
4268     }
4269
4270     Res = DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align,
4271                          I.getOperand(1), 0, I.getOperand(2), 0);
4272     DAG.setRoot(Res);
4273     if (DisableScheduling)
4274       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4275     return 0;
4276   }
4277   case Intrinsic::dbg_stoppoint:
4278   case Intrinsic::dbg_region_start:
4279   case Intrinsic::dbg_region_end:
4280   case Intrinsic::dbg_func_start:
4281     // FIXME - Remove this instructions once the dust settles.
4282     return 0;
4283   case Intrinsic::dbg_declare: {
4284     if (OptLevel != CodeGenOpt::None)
4285       // FIXME: Variable debug info is not supported here.
4286       return 0;
4287     DwarfWriter *DW = DAG.getDwarfWriter();
4288     if (!DW)
4289       return 0;
4290     DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
4291     if (!isValidDebugInfoIntrinsic(DI, CodeGenOpt::None))
4292       return 0;
4293
4294     MDNode *Variable = DI.getVariable();
4295     Value *Address = DI.getAddress();
4296     if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
4297       Address = BCI->getOperand(0);
4298     AllocaInst *AI = dyn_cast<AllocaInst>(Address);
4299     // Don't handle byval struct arguments or VLAs, for example.
4300     if (!AI)
4301       return 0;
4302     DenseMap<const AllocaInst*, int>::iterator SI =
4303       FuncInfo.StaticAllocaMap.find(AI);
4304     if (SI == FuncInfo.StaticAllocaMap.end())
4305       return 0; // VLAs.
4306     int FI = SI->second;
4307
4308     MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
4309     if (MMI) {
4310       MetadataContext &TheMetadata =
4311         DI.getParent()->getContext().getMetadata();
4312       unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
4313       MDNode *Dbg = TheMetadata.getMD(MDDbgKind, &DI);
4314       MMI->setVariableDbgInfo(Variable, FI, Dbg);
4315     }
4316     return 0;
4317   }
4318   case Intrinsic::eh_exception: {
4319     // Insert the EXCEPTIONADDR instruction.
4320     assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!");
4321     SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
4322     SDValue Ops[1];
4323     Ops[0] = DAG.getRoot();
4324     SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
4325     setValue(&I, Op);
4326     DAG.setRoot(Op.getValue(1));
4327     if (DisableScheduling)
4328       DAG.AssignOrdering(Op.getNode(), SDNodeOrder);
4329     return 0;
4330   }
4331
4332   case Intrinsic::eh_selector: {
4333     MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
4334
4335     if (CurMBB->isLandingPad())
4336       AddCatchInfo(I, MMI, CurMBB);
4337     else {
4338 #ifndef NDEBUG
4339       FuncInfo.CatchInfoLost.insert(&I);
4340 #endif
4341       // FIXME: Mark exception selector register as live in.  Hack for PR1508.
4342       unsigned Reg = TLI.getExceptionSelectorRegister();
4343       if (Reg) CurMBB->addLiveIn(Reg);
4344     }
4345
4346     // Insert the EHSELECTION instruction.
4347     SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
4348     SDValue Ops[2];
4349     Ops[0] = getValue(I.getOperand(1));
4350     Ops[1] = getRoot();
4351     SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
4352
4353     DAG.setRoot(Op.getValue(1));
4354
4355     Res = DAG.getSExtOrTrunc(Op, dl, MVT::i32);
4356     setValue(&I, Res);
4357     if (DisableScheduling) {
4358       DAG.AssignOrdering(Op.getNode(), SDNodeOrder);
4359       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4360     }
4361     return 0;
4362   }
4363
4364   case Intrinsic::eh_typeid_for: {
4365     MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
4366
4367     if (MMI) {
4368       // Find the type id for the given typeinfo.
4369       GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1));
4370       unsigned TypeID = MMI->getTypeIDFor(GV);
4371       Res = DAG.getConstant(TypeID, MVT::i32);
4372     } else {
4373       // Return something different to eh_selector.
4374       Res = DAG.getConstant(1, MVT::i32);
4375     }
4376
4377     setValue(&I, Res);
4378     if (DisableScheduling)
4379       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4380     return 0;
4381   }
4382
4383   case Intrinsic::eh_return_i32:
4384   case Intrinsic::eh_return_i64:
4385     if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
4386       MMI->setCallsEHReturn(true);
4387       Res = DAG.getNode(ISD::EH_RETURN, dl,
4388                         MVT::Other,
4389                         getControlRoot(),
4390                         getValue(I.getOperand(1)),
4391                         getValue(I.getOperand(2)));
4392       DAG.setRoot(Res);
4393       if (DisableScheduling)
4394         DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4395     } else {
4396       setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
4397     }
4398
4399     return 0;
4400   case Intrinsic::eh_unwind_init:
4401     if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
4402       MMI->setCallsUnwindInit(true);
4403     }
4404     return 0;
4405   case Intrinsic::eh_dwarf_cfa: {
4406     EVT VT = getValue(I.getOperand(1)).getValueType();
4407     SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), dl,
4408                                         TLI.getPointerTy());
4409     SDValue Offset = DAG.getNode(ISD::ADD, dl,
4410                                  TLI.getPointerTy(),
4411                                  DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
4412                                              TLI.getPointerTy()),
4413                                  CfaArg);
4414     SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl,
4415                              TLI.getPointerTy(),
4416                              DAG.getConstant(0, TLI.getPointerTy()));
4417     Res = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
4418                       FA, Offset);
4419     setValue(&I, Res);
4420     if (DisableScheduling) {
4421       DAG.AssignOrdering(CfaArg.getNode(), SDNodeOrder);
4422       DAG.AssignOrdering(Offset.getNode(), SDNodeOrder);
4423       DAG.AssignOrdering(FA.getNode(), SDNodeOrder);
4424       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4425     }
4426     return 0;
4427   }
4428   case Intrinsic::convertff:
4429   case Intrinsic::convertfsi:
4430   case Intrinsic::convertfui:
4431   case Intrinsic::convertsif:
4432   case Intrinsic::convertuif:
4433   case Intrinsic::convertss:
4434   case Intrinsic::convertsu:
4435   case Intrinsic::convertus:
4436   case Intrinsic::convertuu: {
4437     ISD::CvtCode Code = ISD::CVT_INVALID;
4438     switch (Intrinsic) {
4439     case Intrinsic::convertff:  Code = ISD::CVT_FF; break;
4440     case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
4441     case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
4442     case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
4443     case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
4444     case Intrinsic::convertss:  Code = ISD::CVT_SS; break;
4445     case Intrinsic::convertsu:  Code = ISD::CVT_SU; break;
4446     case Intrinsic::convertus:  Code = ISD::CVT_US; break;
4447     case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
4448     }
4449     EVT DestVT = TLI.getValueType(I.getType());
4450     Value *Op1 = I.getOperand(1);
4451     Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
4452                                DAG.getValueType(DestVT),
4453                                DAG.getValueType(getValue(Op1).getValueType()),
4454                                getValue(I.getOperand(2)),
4455                                getValue(I.getOperand(3)),
4456                                Code);
4457     setValue(&I, Res);
4458     if (DisableScheduling)
4459       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4460     return 0;
4461   }
4462   case Intrinsic::sqrt:
4463     Res = DAG.getNode(ISD::FSQRT, dl,
4464                       getValue(I.getOperand(1)).getValueType(),
4465                       getValue(I.getOperand(1)));
4466     setValue(&I, Res);
4467     if (DisableScheduling)
4468       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4469     return 0;
4470   case Intrinsic::powi:
4471     Res = DAG.getNode(ISD::FPOWI, dl,
4472                       getValue(I.getOperand(1)).getValueType(),
4473                       getValue(I.getOperand(1)),
4474                       getValue(I.getOperand(2)));
4475     setValue(&I, Res);
4476     if (DisableScheduling)
4477       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4478     return 0;
4479   case Intrinsic::sin:
4480     Res = DAG.getNode(ISD::FSIN, dl,
4481                       getValue(I.getOperand(1)).getValueType(),
4482                       getValue(I.getOperand(1)));
4483     setValue(&I, Res);
4484     if (DisableScheduling)
4485       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4486     return 0;
4487   case Intrinsic::cos:
4488     Res = DAG.getNode(ISD::FCOS, dl,
4489                       getValue(I.getOperand(1)).getValueType(),
4490                       getValue(I.getOperand(1)));
4491     setValue(&I, Res);
4492     if (DisableScheduling)
4493       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4494     return 0;
4495   case Intrinsic::log:
4496     visitLog(I);
4497     return 0;
4498   case Intrinsic::log2:
4499     visitLog2(I);
4500     return 0;
4501   case Intrinsic::log10:
4502     visitLog10(I);
4503     return 0;
4504   case Intrinsic::exp:
4505     visitExp(I);
4506     return 0;
4507   case Intrinsic::exp2:
4508     visitExp2(I);
4509     return 0;
4510   case Intrinsic::pow:
4511     visitPow(I);
4512     return 0;
4513   case Intrinsic::pcmarker: {
4514     SDValue Tmp = getValue(I.getOperand(1));
4515     Res = DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp);
4516     DAG.setRoot(Res);
4517     if (DisableScheduling)
4518       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4519     return 0;
4520   }
4521   case Intrinsic::readcyclecounter: {
4522     SDValue Op = getRoot();
4523     Res = DAG.getNode(ISD::READCYCLECOUNTER, dl,
4524                       DAG.getVTList(MVT::i64, MVT::Other),
4525                       &Op, 1);
4526     setValue(&I, Res);
4527     DAG.setRoot(Res.getValue(1));
4528     if (DisableScheduling)
4529       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4530     return 0;
4531   }
4532   case Intrinsic::bswap:
4533     Res = DAG.getNode(ISD::BSWAP, dl,
4534                       getValue(I.getOperand(1)).getValueType(),
4535                       getValue(I.getOperand(1)));
4536     setValue(&I, Res);
4537     if (DisableScheduling)
4538       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4539     return 0;
4540   case Intrinsic::cttz: {
4541     SDValue Arg = getValue(I.getOperand(1));
4542     EVT Ty = Arg.getValueType();
4543     Res = DAG.getNode(ISD::CTTZ, dl, Ty, Arg);
4544     setValue(&I, Res);
4545     if (DisableScheduling)
4546       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4547     return 0;
4548   }
4549   case Intrinsic::ctlz: {
4550     SDValue Arg = getValue(I.getOperand(1));
4551     EVT Ty = Arg.getValueType();
4552     Res = DAG.getNode(ISD::CTLZ, dl, Ty, Arg);
4553     setValue(&I, Res);
4554     if (DisableScheduling)
4555       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4556     return 0;
4557   }
4558   case Intrinsic::ctpop: {
4559     SDValue Arg = getValue(I.getOperand(1));
4560     EVT Ty = Arg.getValueType();
4561     Res = DAG.getNode(ISD::CTPOP, dl, Ty, Arg);
4562     setValue(&I, Res);
4563     if (DisableScheduling)
4564       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4565     return 0;
4566   }
4567   case Intrinsic::stacksave: {
4568     SDValue Op = getRoot();
4569     Res = DAG.getNode(ISD::STACKSAVE, dl,
4570                       DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
4571     setValue(&I, Res);
4572     DAG.setRoot(Res.getValue(1));
4573     if (DisableScheduling)
4574       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4575     return 0;
4576   }
4577   case Intrinsic::stackrestore: {
4578     Res = getValue(I.getOperand(1));
4579     Res = DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res);
4580     DAG.setRoot(Res);
4581     if (DisableScheduling)
4582       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4583     return 0;
4584   }
4585   case Intrinsic::stackprotector: {
4586     // Emit code into the DAG to store the stack guard onto the stack.
4587     MachineFunction &MF = DAG.getMachineFunction();
4588     MachineFrameInfo *MFI = MF.getFrameInfo();
4589     EVT PtrTy = TLI.getPointerTy();
4590
4591     SDValue Src = getValue(I.getOperand(1));   // The guard's value.
4592     AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
4593
4594     int FI = FuncInfo.StaticAllocaMap[Slot];
4595     MFI->setStackProtectorIndex(FI);
4596
4597     SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
4598
4599     // Store the stack protector onto the stack.
4600     Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
4601                        PseudoSourceValue::getFixedStack(FI),
4602                        0, true);
4603     setValue(&I, Res);
4604     DAG.setRoot(Res);
4605     if (DisableScheduling)
4606       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4607     return 0;
4608   }
4609   case Intrinsic::objectsize: {
4610     // If we don't know by now, we're never going to know.
4611     ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2));
4612
4613     assert(CI && "Non-constant type in __builtin_object_size?");
4614
4615     SDValue Arg = getValue(I.getOperand(0));
4616     EVT Ty = Arg.getValueType();
4617
4618     if (CI->getZExtValue() < 2)
4619       Res = DAG.getConstant(-1ULL, Ty);
4620     else
4621       Res = DAG.getConstant(0, Ty);
4622
4623     setValue(&I, Res);
4624     if (DisableScheduling)
4625       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4626     return 0;
4627   }
4628   case Intrinsic::var_annotation:
4629     // Discard annotate attributes
4630     return 0;
4631
4632   case Intrinsic::init_trampoline: {
4633     const Function *F = cast<Function>(I.getOperand(2)->stripPointerCasts());
4634
4635     SDValue Ops[6];
4636     Ops[0] = getRoot();
4637     Ops[1] = getValue(I.getOperand(1));
4638     Ops[2] = getValue(I.getOperand(2));
4639     Ops[3] = getValue(I.getOperand(3));
4640     Ops[4] = DAG.getSrcValue(I.getOperand(1));
4641     Ops[5] = DAG.getSrcValue(F);
4642
4643     Res = DAG.getNode(ISD::TRAMPOLINE, dl,
4644                       DAG.getVTList(TLI.getPointerTy(), MVT::Other),
4645                       Ops, 6);
4646
4647     setValue(&I, Res);
4648     DAG.setRoot(Res.getValue(1));
4649     if (DisableScheduling)
4650       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4651     return 0;
4652   }
4653   case Intrinsic::gcroot:
4654     if (GFI) {
4655       Value *Alloca = I.getOperand(1);
4656       Constant *TypeMap = cast<Constant>(I.getOperand(2));
4657
4658       FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
4659       GFI->addStackRoot(FI->getIndex(), TypeMap);
4660     }
4661     return 0;
4662   case Intrinsic::gcread:
4663   case Intrinsic::gcwrite:
4664     llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
4665     return 0;
4666   case Intrinsic::flt_rounds:
4667     Res = DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32);
4668     setValue(&I, Res);
4669     if (DisableScheduling)
4670       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4671     return 0;
4672   case Intrinsic::trap:
4673     Res = DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot());
4674     DAG.setRoot(Res);
4675     if (DisableScheduling)
4676       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4677     return 0;
4678   case Intrinsic::uadd_with_overflow:
4679     return implVisitAluOverflow(I, ISD::UADDO);
4680   case Intrinsic::sadd_with_overflow:
4681     return implVisitAluOverflow(I, ISD::SADDO);
4682   case Intrinsic::usub_with_overflow:
4683     return implVisitAluOverflow(I, ISD::USUBO);
4684   case Intrinsic::ssub_with_overflow:
4685     return implVisitAluOverflow(I, ISD::SSUBO);
4686   case Intrinsic::umul_with_overflow:
4687     return implVisitAluOverflow(I, ISD::UMULO);
4688   case Intrinsic::smul_with_overflow:
4689     return implVisitAluOverflow(I, ISD::SMULO);
4690
4691   case Intrinsic::prefetch: {
4692     SDValue Ops[4];
4693     Ops[0] = getRoot();
4694     Ops[1] = getValue(I.getOperand(1));
4695     Ops[2] = getValue(I.getOperand(2));
4696     Ops[3] = getValue(I.getOperand(3));
4697     Res = DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4);
4698     DAG.setRoot(Res);
4699     if (DisableScheduling)
4700       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4701     return 0;
4702   }
4703
4704   case Intrinsic::memory_barrier: {
4705     SDValue Ops[6];
4706     Ops[0] = getRoot();
4707     for (int x = 1; x < 6; ++x)
4708       Ops[x] = getValue(I.getOperand(x));
4709
4710     Res = DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6);
4711     DAG.setRoot(Res);
4712     if (DisableScheduling)
4713       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4714     return 0;
4715   }
4716   case Intrinsic::atomic_cmp_swap: {
4717     SDValue Root = getRoot();
4718     SDValue L =
4719       DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
4720                     getValue(I.getOperand(2)).getValueType().getSimpleVT(),
4721                     Root,
4722                     getValue(I.getOperand(1)),
4723                     getValue(I.getOperand(2)),
4724                     getValue(I.getOperand(3)),
4725                     I.getOperand(1));
4726     setValue(&I, L);
4727     DAG.setRoot(L.getValue(1));
4728     if (DisableScheduling)
4729       DAG.AssignOrdering(L.getNode(), SDNodeOrder);
4730     return 0;
4731   }
4732   case Intrinsic::atomic_load_add:
4733     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD);
4734   case Intrinsic::atomic_load_sub:
4735     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB);
4736   case Intrinsic::atomic_load_or:
4737     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);
4738   case Intrinsic::atomic_load_xor:
4739     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);
4740   case Intrinsic::atomic_load_and:
4741     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);
4742   case Intrinsic::atomic_load_nand:
4743     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND);
4744   case Intrinsic::atomic_load_max:
4745     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);
4746   case Intrinsic::atomic_load_min:
4747     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);
4748   case Intrinsic::atomic_load_umin:
4749     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);
4750   case Intrinsic::atomic_load_umax:
4751     return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
4752   case Intrinsic::atomic_swap:
4753     return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
4754
4755   case Intrinsic::invariant_start:
4756   case Intrinsic::lifetime_start:
4757     // Discard region information.
4758     Res = DAG.getUNDEF(TLI.getPointerTy());
4759     setValue(&I, Res);
4760     if (DisableScheduling)
4761       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
4762     return 0;
4763   case Intrinsic::invariant_end:
4764   case Intrinsic::lifetime_end:
4765     // Discard region information.
4766     return 0;
4767   }
4768 }
4769
4770 /// Test if the given instruction is in a position to be optimized
4771 /// with a tail-call. This roughly means that it's in a block with
4772 /// a return and there's nothing that needs to be scheduled
4773 /// between it and the return.
4774 ///
4775 /// This function only tests target-independent requirements.
4776 /// For target-dependent requirements, a target should override
4777 /// TargetLowering::IsEligibleForTailCallOptimization.
4778 ///
4779 static bool
4780 isInTailCallPosition(const Instruction *I, Attributes CalleeRetAttr,
4781                      const TargetLowering &TLI) {
4782   const BasicBlock *ExitBB = I->getParent();
4783   const TerminatorInst *Term = ExitBB->getTerminator();
4784   const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
4785   const Function *F = ExitBB->getParent();
4786
4787   // The block must end in a return statement or an unreachable.
4788   if (!Ret && !isa<UnreachableInst>(Term)) return false;
4789
4790   // If I will have a chain, make sure no other instruction that will have a
4791   // chain interposes between I and the return.
4792   if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
4793       !I->isSafeToSpeculativelyExecute())
4794     for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
4795          --BBI) {
4796       if (&*BBI == I)
4797         break;
4798       if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
4799           !BBI->isSafeToSpeculativelyExecute())
4800         return false;
4801     }
4802
4803   // If the block ends with a void return or unreachable, it doesn't matter
4804   // what the call's return type is.
4805   if (!Ret || Ret->getNumOperands() == 0) return true;
4806
4807   // If the return value is undef, it doesn't matter what the call's
4808   // return type is.
4809   if (isa<UndefValue>(Ret->getOperand(0))) return true;
4810
4811   // Conservatively require the attributes of the call to match those of
4812   // the return. Ignore noalias because it doesn't affect the call sequence.
4813   unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
4814   if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
4815     return false;
4816
4817   // Otherwise, make sure the unmodified return value of I is the return value.
4818   for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
4819        U = dyn_cast<Instruction>(U->getOperand(0))) {
4820     if (!U)
4821       return false;
4822     if (!U->hasOneUse())
4823       return false;
4824     if (U == I)
4825       break;
4826     // Check for a truly no-op truncate.
4827     if (isa<TruncInst>(U) &&
4828         TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
4829       continue;
4830     // Check for a truly no-op bitcast.
4831     if (isa<BitCastInst>(U) &&
4832         (U->getOperand(0)->getType() == U->getType() ||
4833          (isa<PointerType>(U->getOperand(0)->getType()) &&
4834           isa<PointerType>(U->getType()))))
4835       continue;
4836     // Otherwise it's not a true no-op.
4837     return false;
4838   }
4839
4840   return true;
4841 }
4842
4843 void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
4844                                       bool isTailCall,
4845                                       MachineBasicBlock *LandingPad) {
4846   const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
4847   const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
4848   const Type *RetTy = FTy->getReturnType();
4849   MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
4850   unsigned BeginLabel = 0, EndLabel = 0;
4851
4852   TargetLowering::ArgListTy Args;
4853   TargetLowering::ArgListEntry Entry;
4854   Args.reserve(CS.arg_size());
4855
4856   // Check whether the function can return without sret-demotion.
4857   SmallVector<EVT, 4> OutVTs;
4858   SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
4859   SmallVector<uint64_t, 4> Offsets;
4860   getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
4861                 OutVTs, OutsFlags, TLI, &Offsets);
4862
4863   bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
4864                         FTy->isVarArg(), OutVTs, OutsFlags, DAG);
4865
4866   SDValue DemoteStackSlot;
4867
4868   if (!CanLowerReturn) {
4869     uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
4870                       FTy->getReturnType());
4871     unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(
4872                       FTy->getReturnType());
4873     MachineFunction &MF = DAG.getMachineFunction();
4874     int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
4875     const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
4876
4877     DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
4878     Entry.Node = DemoteStackSlot;
4879     Entry.Ty = StackSlotPtrType;
4880     Entry.isSExt = false;
4881     Entry.isZExt = false;
4882     Entry.isInReg = false;
4883     Entry.isSRet = true;
4884     Entry.isNest = false;
4885     Entry.isByVal = false;
4886     Entry.Alignment = Align;
4887     Args.push_back(Entry);
4888     RetTy = Type::getVoidTy(FTy->getContext());
4889   }
4890
4891   for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
4892        i != e; ++i) {
4893     SDValue ArgNode = getValue(*i);
4894     Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
4895
4896     unsigned attrInd = i - CS.arg_begin() + 1;
4897     Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
4898     Entry.isZExt  = CS.paramHasAttr(attrInd, Attribute::ZExt);
4899     Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
4900     Entry.isSRet  = CS.paramHasAttr(attrInd, Attribute::StructRet);
4901     Entry.isNest  = CS.paramHasAttr(attrInd, Attribute::Nest);
4902     Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
4903     Entry.Alignment = CS.getParamAlignment(attrInd);
4904     Args.push_back(Entry);
4905   }
4906
4907   if (LandingPad && MMI) {
4908     // Insert a label before the invoke call to mark the try range.  This can be
4909     // used to detect deletion of the invoke via the MachineModuleInfo.
4910     BeginLabel = MMI->NextLabelID();
4911
4912     // Both PendingLoads and PendingExports must be flushed here;
4913     // this call might not return.
4914     (void)getRoot();
4915     SDValue Label = DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
4916                                  getControlRoot(), BeginLabel);
4917     DAG.setRoot(Label);
4918     if (DisableScheduling)
4919       DAG.AssignOrdering(Label.getNode(), SDNodeOrder);
4920   }
4921
4922   // Check if target-independent constraints permit a tail call here.
4923   // Target-dependent constraints are checked within TLI.LowerCallTo.
4924   if (isTailCall &&
4925       !isInTailCallPosition(CS.getInstruction(),
4926                             CS.getAttributes().getRetAttributes(),
4927                             TLI))
4928     isTailCall = false;
4929
4930   std::pair<SDValue,SDValue> Result =
4931     TLI.LowerCallTo(getRoot(), RetTy,
4932                     CS.paramHasAttr(0, Attribute::SExt),
4933                     CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
4934                     CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
4935                     CS.getCallingConv(),
4936                     isTailCall,
4937                     !CS.getInstruction()->use_empty(),
4938                     Callee, Args, DAG, getCurDebugLoc());
4939   assert((isTailCall || Result.second.getNode()) &&
4940          "Non-null chain expected with non-tail call!");
4941   assert((Result.second.getNode() || !Result.first.getNode()) &&
4942          "Null value expected with tail call!");
4943   if (Result.first.getNode()) {
4944     setValue(CS.getInstruction(), Result.first);
4945     if (DisableScheduling)
4946       DAG.AssignOrdering(Result.first.getNode(), SDNodeOrder);
4947   } else if (!CanLowerReturn && Result.second.getNode()) {
4948     // The instruction result is the result of loading from the
4949     // hidden sret parameter.
4950     SmallVector<EVT, 1> PVTs;
4951     const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
4952
4953     ComputeValueVTs(TLI, PtrRetTy, PVTs);
4954     assert(PVTs.size() == 1 && "Pointers should fit in one register");
4955     EVT PtrVT = PVTs[0];
4956     unsigned NumValues = OutVTs.size();
4957     SmallVector<SDValue, 4> Values(NumValues);
4958     SmallVector<SDValue, 4> Chains(NumValues);
4959
4960     for (unsigned i = 0; i < NumValues; ++i) {
4961       SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT,
4962                                 DemoteStackSlot,
4963                                 DAG.getConstant(Offsets[i], PtrVT));
4964       SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second,
4965                               Add, NULL, Offsets[i], false, 1);
4966       Values[i] = L;
4967       Chains[i] = L.getValue(1);
4968
4969       if (DisableScheduling) {
4970         DAG.AssignOrdering(Add.getNode(), SDNodeOrder);
4971         DAG.AssignOrdering(L.getNode(), SDNodeOrder);
4972       }
4973     }
4974
4975     SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
4976                                 MVT::Other, &Chains[0], NumValues);
4977     PendingLoads.push_back(Chain);
4978
4979     SDValue MV = DAG.getNode(ISD::MERGE_VALUES,
4980                              getCurDebugLoc(),
4981                              DAG.getVTList(&OutVTs[0], NumValues),
4982                              &Values[0], NumValues);
4983     setValue(CS.getInstruction(), MV);
4984
4985     if (DisableScheduling) {
4986       DAG.AssignOrdering(Chain.getNode(), SDNodeOrder);
4987       DAG.AssignOrdering(MV.getNode(), SDNodeOrder);
4988     }
4989   }
4990
4991   // As a special case, a null chain means that a tail call has been emitted and
4992   // the DAG root is already updated.
4993   if (Result.second.getNode()) {
4994     DAG.setRoot(Result.second);
4995     if (DisableScheduling)
4996       DAG.AssignOrdering(Result.second.getNode(), SDNodeOrder);
4997   } else {
4998     HasTailCall = true;
4999   }
5000
5001   if (LandingPad && MMI) {
5002     // Insert a label at the end of the invoke call to mark the try range.  This
5003     // can be used to detect deletion of the invoke via the MachineModuleInfo.
5004     EndLabel = MMI->NextLabelID();
5005     SDValue Label = DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
5006                                  getRoot(), EndLabel);
5007     DAG.setRoot(Label);
5008
5009     if (DisableScheduling)
5010       DAG.AssignOrdering(Label.getNode(), SDNodeOrder);
5011
5012     // Inform MachineModuleInfo of range.
5013     MMI->addInvoke(LandingPad, BeginLabel, EndLabel);
5014   }
5015 }
5016
5017 void SelectionDAGBuilder::visitCall(CallInst &I) {
5018   const char *RenameFn = 0;
5019   if (Function *F = I.getCalledFunction()) {
5020     if (F->isDeclaration()) {
5021       const TargetIntrinsicInfo *II = TLI.getTargetMachine().getIntrinsicInfo();
5022       if (II) {
5023         if (unsigned IID = II->getIntrinsicID(F)) {
5024           RenameFn = visitIntrinsicCall(I, IID);
5025           if (!RenameFn)
5026             return;
5027         }
5028       }
5029       if (unsigned IID = F->getIntrinsicID()) {
5030         RenameFn = visitIntrinsicCall(I, IID);
5031         if (!RenameFn)
5032           return;
5033       }
5034     }
5035
5036     // Check for well-known libc/libm calls.  If the function is internal, it
5037     // can't be a library call.
5038     if (!F->hasLocalLinkage() && F->hasName()) {
5039       StringRef Name = F->getName();
5040       if (Name == "copysign" || Name == "copysignf") {
5041         if (I.getNumOperands() == 3 &&   // Basic sanity checks.
5042             I.getOperand(1)->getType()->isFloatingPoint() &&
5043             I.getType() == I.getOperand(1)->getType() &&
5044             I.getType() == I.getOperand(2)->getType()) {
5045           SDValue LHS = getValue(I.getOperand(1));
5046           SDValue RHS = getValue(I.getOperand(2));
5047           SDValue Res = DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
5048                                     LHS.getValueType(), LHS, RHS);
5049           setValue(&I, Res);
5050           if (DisableScheduling)
5051             DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
5052           return;
5053         }
5054       } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
5055         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
5056             I.getOperand(1)->getType()->isFloatingPoint() &&
5057             I.getType() == I.getOperand(1)->getType()) {
5058           SDValue Tmp = getValue(I.getOperand(1));
5059           SDValue Res = DAG.getNode(ISD::FABS, getCurDebugLoc(),
5060                                     Tmp.getValueType(), Tmp);
5061           setValue(&I, Res);
5062           if (DisableScheduling)
5063             DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
5064           return;
5065         }
5066       } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
5067         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
5068             I.getOperand(1)->getType()->isFloatingPoint() &&
5069             I.getType() == I.getOperand(1)->getType() &&
5070             I.onlyReadsMemory()) {
5071           SDValue Tmp = getValue(I.getOperand(1));
5072           SDValue Res = DAG.getNode(ISD::FSIN, getCurDebugLoc(),
5073                                     Tmp.getValueType(), Tmp);
5074           setValue(&I, Res);
5075           if (DisableScheduling)
5076             DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
5077           return;
5078         }
5079       } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
5080         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
5081             I.getOperand(1)->getType()->isFloatingPoint() &&
5082             I.getType() == I.getOperand(1)->getType() &&
5083             I.onlyReadsMemory()) {
5084           SDValue Tmp = getValue(I.getOperand(1));
5085           SDValue Res = DAG.getNode(ISD::FCOS, getCurDebugLoc(),
5086                                     Tmp.getValueType(), Tmp);
5087           setValue(&I, Res);
5088           if (DisableScheduling)
5089             DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
5090           return;
5091         }
5092       } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
5093         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
5094             I.getOperand(1)->getType()->isFloatingPoint() &&
5095             I.getType() == I.getOperand(1)->getType() &&
5096             I.onlyReadsMemory()) {
5097           SDValue Tmp = getValue(I.getOperand(1));
5098           SDValue Res = DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
5099                                     Tmp.getValueType(), Tmp);
5100           setValue(&I, Res);
5101           if (DisableScheduling)
5102             DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
5103           return;
5104         }
5105       }
5106     }
5107   } else if (isa<InlineAsm>(I.getOperand(0))) {
5108     visitInlineAsm(&I);
5109     return;
5110   }
5111
5112   SDValue Callee;
5113   if (!RenameFn)
5114     Callee = getValue(I.getOperand(0));
5115   else
5116     Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
5117
5118   if (DisableScheduling)
5119     DAG.AssignOrdering(Callee.getNode(), SDNodeOrder);
5120
5121   // Check if we can potentially perform a tail call. More detailed
5122   // checking is be done within LowerCallTo, after more information
5123   // about the call is known.
5124   bool isTailCall = PerformTailCallOpt && I.isTailCall();
5125
5126   LowerCallTo(&I, Callee, isTailCall);
5127 }
5128
5129 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
5130 /// this value and returns the result as a ValueVT value.  This uses
5131 /// Chain/Flag as the input and updates them for the output Chain/Flag.
5132 /// If the Flag pointer is NULL, no flag is used.
5133 SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
5134                                       unsigned Order, SDValue &Chain,
5135                                       SDValue *Flag) const {
5136   // Assemble the legal parts into the final values.
5137   SmallVector<SDValue, 4> Values(ValueVTs.size());
5138   SmallVector<SDValue, 8> Parts;
5139   for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
5140     // Copy the legal parts from the registers.
5141     EVT ValueVT = ValueVTs[Value];
5142     unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
5143     EVT RegisterVT = RegVTs[Value];
5144
5145     Parts.resize(NumRegs);
5146     for (unsigned i = 0; i != NumRegs; ++i) {
5147       SDValue P;
5148       if (Flag == 0) {
5149         P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
5150       } else {
5151         P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
5152         *Flag = P.getValue(2);
5153       }
5154
5155       Chain = P.getValue(1);
5156
5157       if (DisableScheduling)
5158         DAG.AssignOrdering(P.getNode(), Order);
5159
5160       // If the source register was virtual and if we know something about it,
5161       // add an assert node.
5162       if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
5163           RegisterVT.isInteger() && !RegisterVT.isVector()) {
5164         unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
5165         FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
5166         if (FLI.LiveOutRegInfo.size() > SlotNo) {
5167           FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo];
5168
5169           unsigned RegSize = RegisterVT.getSizeInBits();
5170           unsigned NumSignBits = LOI.NumSignBits;
5171           unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
5172
5173           // FIXME: We capture more information than the dag can represent.  For
5174           // now, just use the tightest assertzext/assertsext possible.
5175           bool isSExt = true;
5176           EVT FromVT(MVT::Other);
5177           if (NumSignBits == RegSize)
5178             isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
5179           else if (NumZeroBits >= RegSize-1)
5180             isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1
5181           else if (NumSignBits > RegSize-8)
5182             isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8
5183           else if (NumZeroBits >= RegSize-8)
5184             isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8
5185           else if (NumSignBits > RegSize-16)
5186             isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16
5187           else if (NumZeroBits >= RegSize-16)
5188             isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
5189           else if (NumSignBits > RegSize-32)
5190             isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32
5191           else if (NumZeroBits >= RegSize-32)
5192             isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
5193
5194           if (FromVT != MVT::Other) {
5195             P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
5196                             RegisterVT, P, DAG.getValueType(FromVT));
5197
5198             if (DisableScheduling)
5199               DAG.AssignOrdering(P.getNode(), Order);
5200           }
5201         }
5202       }
5203
5204       Parts[i] = P;
5205     }
5206
5207     Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
5208                                      NumRegs, RegisterVT, ValueVT);
5209     if (DisableScheduling)
5210       DAG.AssignOrdering(Values[Value].getNode(), Order);
5211     Part += NumRegs;
5212     Parts.clear();
5213   }
5214
5215   SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
5216                             DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
5217                             &Values[0], ValueVTs.size());
5218   if (DisableScheduling)
5219     DAG.AssignOrdering(Res.getNode(), Order);
5220   return Res;
5221 }
5222
5223 /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
5224 /// specified value into the registers specified by this object.  This uses
5225 /// Chain/Flag as the input and updates them for the output Chain/Flag.
5226 /// If the Flag pointer is NULL, no flag is used.
5227 void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
5228                                  unsigned Order, SDValue &Chain,
5229                                  SDValue *Flag) const {
5230   // Get the list of the values's legal parts.
5231   unsigned NumRegs = Regs.size();
5232   SmallVector<SDValue, 8> Parts(NumRegs);
5233   for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
5234     EVT ValueVT = ValueVTs[Value];
5235     unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
5236     EVT RegisterVT = RegVTs[Value];
5237
5238     getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
5239                    &Parts[Part], NumParts, RegisterVT);
5240     Part += NumParts;
5241   }
5242
5243   // Copy the parts into the registers.
5244   SmallVector<SDValue, 8> Chains(NumRegs);
5245   for (unsigned i = 0; i != NumRegs; ++i) {
5246     SDValue Part;
5247     if (Flag == 0) {
5248       Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
5249     } else {
5250       Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
5251       *Flag = Part.getValue(1);
5252     }
5253
5254     Chains[i] = Part.getValue(0);
5255
5256     if (DisableScheduling)
5257       DAG.AssignOrdering(Part.getNode(), Order);
5258   }
5259
5260   if (NumRegs == 1 || Flag)
5261     // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
5262     // flagged to it. That is the CopyToReg nodes and the user are considered
5263     // a single scheduling unit. If we create a TokenFactor and return it as
5264     // chain, then the TokenFactor is both a predecessor (operand) of the
5265     // user as well as a successor (the TF operands are flagged to the user).
5266     // c1, f1 = CopyToReg
5267     // c2, f2 = CopyToReg
5268     // c3     = TokenFactor c1, c2
5269     // ...
5270     //        = op c3, ..., f2
5271     Chain = Chains[NumRegs-1];
5272   else
5273     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
5274
5275   if (DisableScheduling)
5276     DAG.AssignOrdering(Chain.getNode(), Order);
5277 }
5278
5279 /// AddInlineAsmOperands - Add this value to the specified inlineasm node
5280 /// operand list.  This adds the code marker and includes the number of
5281 /// values added into it.
5282 void RegsForValue::AddInlineAsmOperands(unsigned Code,
5283                                         bool HasMatching,unsigned MatchingIdx,
5284                                         SelectionDAG &DAG,
5285                                         std::vector<SDValue> &Ops) const {
5286   EVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy();
5287   assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!");
5288   unsigned Flag = Code | (Regs.size() << 3);
5289   if (HasMatching)
5290     Flag |= 0x80000000 | (MatchingIdx << 16);
5291   Ops.push_back(DAG.getTargetConstant(Flag, IntPtrTy));
5292   for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
5293     unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
5294     EVT RegisterVT = RegVTs[Value];
5295     for (unsigned i = 0; i != NumRegs; ++i) {
5296       assert(Reg < Regs.size() && "Mismatch in # registers expected");
5297       Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
5298     }
5299   }
5300 }
5301
5302 /// isAllocatableRegister - If the specified register is safe to allocate,
5303 /// i.e. it isn't a stack pointer or some other special register, return the
5304 /// register class for the register.  Otherwise, return null.
5305 static const TargetRegisterClass *
5306 isAllocatableRegister(unsigned Reg, MachineFunction &MF,
5307                       const TargetLowering &TLI,
5308                       const TargetRegisterInfo *TRI) {
5309   EVT FoundVT = MVT::Other;
5310   const TargetRegisterClass *FoundRC = 0;
5311   for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
5312        E = TRI->regclass_end(); RCI != E; ++RCI) {
5313     EVT ThisVT = MVT::Other;
5314
5315     const TargetRegisterClass *RC = *RCI;
5316     // If none of the the value types for this register class are valid, we
5317     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
5318     for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
5319          I != E; ++I) {
5320       if (TLI.isTypeLegal(*I)) {
5321         // If we have already found this register in a different register class,
5322         // choose the one with the largest VT specified.  For example, on
5323         // PowerPC, we favor f64 register classes over f32.
5324         if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
5325           ThisVT = *I;
5326           break;
5327         }
5328       }
5329     }
5330
5331     if (ThisVT == MVT::Other) continue;
5332
5333     // NOTE: This isn't ideal.  In particular, this might allocate the
5334     // frame pointer in functions that need it (due to them not being taken
5335     // out of allocation, because a variable sized allocation hasn't been seen
5336     // yet).  This is a slight code pessimization, but should still work.
5337     for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
5338          E = RC->allocation_order_end(MF); I != E; ++I)
5339       if (*I == Reg) {
5340         // We found a matching register class.  Keep looking at others in case
5341         // we find one with larger registers that this physreg is also in.
5342         FoundRC = RC;
5343         FoundVT = ThisVT;
5344         break;
5345       }
5346   }
5347   return FoundRC;
5348 }
5349
5350
5351 namespace llvm {
5352 /// AsmOperandInfo - This contains information for each constraint that we are
5353 /// lowering.
5354 class VISIBILITY_HIDDEN SDISelAsmOperandInfo :
5355     public TargetLowering::AsmOperandInfo {
5356 public:
5357   /// CallOperand - If this is the result output operand or a clobber
5358   /// this is null, otherwise it is the incoming operand to the CallInst.
5359   /// This gets modified as the asm is processed.
5360   SDValue CallOperand;
5361
5362   /// AssignedRegs - If this is a register or register class operand, this
5363   /// contains the set of register corresponding to the operand.
5364   RegsForValue AssignedRegs;
5365
5366   explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info)
5367     : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
5368   }
5369
5370   /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
5371   /// busy in OutputRegs/InputRegs.
5372   void MarkAllocatedRegs(bool isOutReg, bool isInReg,
5373                          std::set<unsigned> &OutputRegs,
5374                          std::set<unsigned> &InputRegs,
5375                          const TargetRegisterInfo &TRI) const {
5376     if (isOutReg) {
5377       for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
5378         MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
5379     }
5380     if (isInReg) {
5381       for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
5382         MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
5383     }
5384   }
5385
5386   /// getCallOperandValEVT - Return the EVT of the Value* that this operand
5387   /// corresponds to.  If there is no Value* for this operand, it returns
5388   /// MVT::Other.
5389   EVT getCallOperandValEVT(LLVMContext &Context,
5390                            const TargetLowering &TLI,
5391                            const TargetData *TD) const {
5392     if (CallOperandVal == 0) return MVT::Other;
5393
5394     if (isa<BasicBlock>(CallOperandVal))
5395       return TLI.getPointerTy();
5396
5397     const llvm::Type *OpTy = CallOperandVal->getType();
5398
5399     // If this is an indirect operand, the operand is a pointer to the
5400     // accessed type.
5401     if (isIndirect)
5402       OpTy = cast<PointerType>(OpTy)->getElementType();
5403
5404     // If OpTy is not a single value, it may be a struct/union that we
5405     // can tile with integers.
5406     if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5407       unsigned BitSize = TD->getTypeSizeInBits(OpTy);
5408       switch (BitSize) {
5409       default: break;
5410       case 1:
5411       case 8:
5412       case 16:
5413       case 32:
5414       case 64:
5415       case 128:
5416         OpTy = IntegerType::get(Context, BitSize);
5417         break;
5418       }
5419     }
5420
5421     return TLI.getValueType(OpTy, true);
5422   }
5423
5424 private:
5425   /// MarkRegAndAliases - Mark the specified register and all aliases in the
5426   /// specified set.
5427   static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
5428                                 const TargetRegisterInfo &TRI) {
5429     assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
5430     Regs.insert(Reg);
5431     if (const unsigned *Aliases = TRI.getAliasSet(Reg))
5432       for (; *Aliases; ++Aliases)
5433         Regs.insert(*Aliases);
5434   }
5435 };
5436 } // end llvm namespace.
5437
5438
5439 /// GetRegistersForValue - Assign registers (virtual or physical) for the
5440 /// specified operand.  We prefer to assign virtual registers, to allow the
5441 /// register allocator to handle the assignment process.  However, if the asm
5442 /// uses features that we can't model on machineinstrs, we have SDISel do the
5443 /// allocation.  This produces generally horrible, but correct, code.
5444 ///
5445 ///   OpInfo describes the operand.
5446 ///   Input and OutputRegs are the set of already allocated physical registers.
5447 ///
5448 void SelectionDAGBuilder::
5449 GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
5450                      std::set<unsigned> &OutputRegs,
5451                      std::set<unsigned> &InputRegs) {
5452   LLVMContext &Context = FuncInfo.Fn->getContext();
5453
5454   // Compute whether this value requires an input register, an output register,
5455   // or both.
5456   bool isOutReg = false;
5457   bool isInReg = false;
5458   switch (OpInfo.Type) {
5459   case InlineAsm::isOutput:
5460     isOutReg = true;
5461
5462     // If there is an input constraint that matches this, we need to reserve
5463     // the input register so no other inputs allocate to it.
5464     isInReg = OpInfo.hasMatchingInput();
5465     break;
5466   case InlineAsm::isInput:
5467     isInReg = true;
5468     isOutReg = false;
5469     break;
5470   case InlineAsm::isClobber:
5471     isOutReg = true;
5472     isInReg = true;
5473     break;
5474   }
5475
5476
5477   MachineFunction &MF = DAG.getMachineFunction();
5478   SmallVector<unsigned, 4> Regs;
5479
5480   // If this is a constraint for a single physreg, or a constraint for a
5481   // register class, find it.
5482   std::pair<unsigned, const TargetRegisterClass*> PhysReg =
5483     TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
5484                                      OpInfo.ConstraintVT);
5485
5486   unsigned NumRegs = 1;
5487   if (OpInfo.ConstraintVT != MVT::Other) {
5488     // If this is a FP input in an integer register (or visa versa) insert a bit
5489     // cast of the input value.  More generally, handle any case where the input
5490     // value disagrees with the register class we plan to stick this in.
5491     if (OpInfo.Type == InlineAsm::isInput &&
5492         PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
5493       // Try to convert to the first EVT that the reg class contains.  If the
5494       // types are identical size, use a bitcast to convert (e.g. two differing
5495       // vector types).
5496       EVT RegVT = *PhysReg.second->vt_begin();
5497       if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
5498         OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
5499                                          RegVT, OpInfo.CallOperand);
5500         OpInfo.ConstraintVT = RegVT;
5501       } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
5502         // If the input is a FP value and we want it in FP registers, do a
5503         // bitcast to the corresponding integer type.  This turns an f64 value
5504         // into i64, which can be passed with two i32 values on a 32-bit
5505         // machine.
5506         RegVT = EVT::getIntegerVT(Context,
5507                                   OpInfo.ConstraintVT.getSizeInBits());
5508         OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
5509                                          RegVT, OpInfo.CallOperand);
5510         OpInfo.ConstraintVT = RegVT;
5511       }
5512     }
5513
5514     NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
5515   }
5516
5517   EVT RegVT;
5518   EVT ValueVT = OpInfo.ConstraintVT;
5519
5520   // If this is a constraint for a specific physical register, like {r17},
5521   // assign it now.
5522   if (unsigned AssignedReg = PhysReg.first) {
5523     const TargetRegisterClass *RC = PhysReg.second;
5524     if (OpInfo.ConstraintVT == MVT::Other)
5525       ValueVT = *RC->vt_begin();
5526
5527     // Get the actual register value type.  This is important, because the user
5528     // may have asked for (e.g.) the AX register in i32 type.  We need to
5529     // remember that AX is actually i16 to get the right extension.
5530     RegVT = *RC->vt_begin();
5531
5532     // This is a explicit reference to a physical register.
5533     Regs.push_back(AssignedReg);
5534
5535     // If this is an expanded reference, add the rest of the regs to Regs.
5536     if (NumRegs != 1) {
5537       TargetRegisterClass::iterator I = RC->begin();
5538       for (; *I != AssignedReg; ++I)
5539         assert(I != RC->end() && "Didn't find reg!");
5540
5541       // Already added the first reg.
5542       --NumRegs; ++I;
5543       for (; NumRegs; --NumRegs, ++I) {
5544         assert(I != RC->end() && "Ran out of registers to allocate!");
5545         Regs.push_back(*I);
5546       }
5547     }
5548     OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
5549     const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
5550     OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
5551     return;
5552   }
5553
5554   // Otherwise, if this was a reference to an LLVM register class, create vregs
5555   // for this reference.
5556   if (const TargetRegisterClass *RC = PhysReg.second) {
5557     RegVT = *RC->vt_begin();
5558     if (OpInfo.ConstraintVT == MVT::Other)
5559       ValueVT = RegVT;
5560
5561     // Create the appropriate number of virtual registers.
5562     MachineRegisterInfo &RegInfo = MF.getRegInfo();
5563     for (; NumRegs; --NumRegs)
5564       Regs.push_back(RegInfo.createVirtualRegister(RC));
5565
5566     OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
5567     return;
5568   }
5569
5570   // This is a reference to a register class that doesn't directly correspond
5571   // to an LLVM register class.  Allocate NumRegs consecutive, available,
5572   // registers from the class.
5573   std::vector<unsigned> RegClassRegs
5574     = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
5575                                             OpInfo.ConstraintVT);
5576
5577   const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
5578   unsigned NumAllocated = 0;
5579   for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
5580     unsigned Reg = RegClassRegs[i];
5581     // See if this register is available.
5582     if ((isOutReg && OutputRegs.count(Reg)) ||   // Already used.
5583         (isInReg  && InputRegs.count(Reg))) {    // Already used.
5584       // Make sure we find consecutive registers.
5585       NumAllocated = 0;
5586       continue;
5587     }
5588
5589     // Check to see if this register is allocatable (i.e. don't give out the
5590     // stack pointer).
5591     const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI);
5592     if (!RC) {        // Couldn't allocate this register.
5593       // Reset NumAllocated to make sure we return consecutive registers.
5594       NumAllocated = 0;
5595       continue;
5596     }
5597
5598     // Okay, this register is good, we can use it.
5599     ++NumAllocated;
5600
5601     // If we allocated enough consecutive registers, succeed.
5602     if (NumAllocated == NumRegs) {
5603       unsigned RegStart = (i-NumAllocated)+1;
5604       unsigned RegEnd   = i+1;
5605       // Mark all of the allocated registers used.
5606       for (unsigned i = RegStart; i != RegEnd; ++i)
5607         Regs.push_back(RegClassRegs[i]);
5608
5609       OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(),
5610                                          OpInfo.ConstraintVT);
5611       OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
5612       return;
5613     }
5614   }
5615
5616   // Otherwise, we couldn't allocate enough registers for this.
5617 }
5618
5619 /// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
5620 /// processed uses a memory 'm' constraint.
5621 static bool
5622 hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
5623                           const TargetLowering &TLI) {
5624   for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
5625     InlineAsm::ConstraintInfo &CI = CInfos[i];
5626     for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
5627       TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
5628       if (CType == TargetLowering::C_Memory)
5629         return true;
5630     }
5631
5632     // Indirect operand accesses access memory.
5633     if (CI.isIndirect)
5634       return true;
5635   }
5636
5637   return false;
5638 }
5639
5640 /// visitInlineAsm - Handle a call to an InlineAsm object.
5641 ///
5642 void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
5643   InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
5644
5645   /// ConstraintOperands - Information about all of the constraints.
5646   std::vector<SDISelAsmOperandInfo> ConstraintOperands;
5647
5648   std::set<unsigned> OutputRegs, InputRegs;
5649
5650   // Do a prepass over the constraints, canonicalizing them, and building up the
5651   // ConstraintOperands list.
5652   std::vector<InlineAsm::ConstraintInfo>
5653     ConstraintInfos = IA->ParseConstraints();
5654
5655   bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);
5656
5657   SDValue Chain, Flag;
5658
5659   // We won't need to flush pending loads if this asm doesn't touch
5660   // memory and is nonvolatile.
5661   if (hasMemory || IA->hasSideEffects())
5662     Chain = getRoot();
5663   else
5664     Chain = DAG.getRoot();
5665
5666   unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
5667   unsigned ResNo = 0;   // ResNo - The result number of the next output.
5668   for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
5669     ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));
5670     SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
5671
5672     EVT OpVT = MVT::Other;
5673
5674     // Compute the value type for each operand.
5675     switch (OpInfo.Type) {
5676     case InlineAsm::isOutput:
5677       // Indirect outputs just consume an argument.
5678       if (OpInfo.isIndirect) {
5679         OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
5680         break;
5681       }
5682
5683       // The return value of the call is this value.  As such, there is no
5684       // corresponding argument.
5685       assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) &&
5686              "Bad inline asm!");
5687       if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
5688         OpVT = TLI.getValueType(STy->getElementType(ResNo));
5689       } else {
5690         assert(ResNo == 0 && "Asm only has one result!");
5691         OpVT = TLI.getValueType(CS.getType());
5692       }
5693       ++ResNo;
5694       break;
5695     case InlineAsm::isInput:
5696       OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
5697       break;
5698     case InlineAsm::isClobber:
5699       // Nothing to do.
5700       break;
5701     }
5702
5703     // If this is an input or an indirect output, process the call argument.
5704     // BasicBlocks are labels, currently appearing only in asm's.
5705     if (OpInfo.CallOperandVal) {
5706       // Strip bitcasts, if any.  This mostly comes up for functions.
5707       OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
5708
5709       if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
5710         OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
5711       } else {
5712         OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
5713       }
5714
5715       OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD);
5716     }
5717
5718     OpInfo.ConstraintVT = OpVT;
5719   }
5720
5721   // Second pass over the constraints: compute which constraint option to use
5722   // and assign registers to constraints that want a specific physreg.
5723   for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
5724     SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
5725
5726     // If this is an output operand with a matching input operand, look up the
5727     // matching input. If their types mismatch, e.g. one is an integer, the
5728     // other is floating point, or their sizes are different, flag it as an
5729     // error.
5730     if (OpInfo.hasMatchingInput()) {
5731       SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5732       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5733         if ((OpInfo.ConstraintVT.isInteger() !=
5734              Input.ConstraintVT.isInteger()) ||
5735             (OpInfo.ConstraintVT.getSizeInBits() !=
5736              Input.ConstraintVT.getSizeInBits())) {
5737           llvm_report_error("Unsupported asm: input constraint"
5738                             " with a matching output constraint of incompatible"
5739                             " type!");
5740         }
5741         Input.ConstraintVT = OpInfo.ConstraintVT;
5742       }
5743     }
5744
5745     // Compute the constraint code and ConstraintType to use.
5746     TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG);
5747
5748     // If this is a memory input, and if the operand is not indirect, do what we
5749     // need to to provide an address for the memory input.
5750     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
5751         !OpInfo.isIndirect) {
5752       assert(OpInfo.Type == InlineAsm::isInput &&
5753              "Can only indirectify direct input operands!");
5754
5755       // Memory operands really want the address of the value.  If we don't have
5756       // an indirect input, put it in the constpool if we can, otherwise spill
5757       // it to a stack slot.
5758
5759       // If the operand is a float, integer, or vector constant, spill to a
5760       // constant pool entry to get its address.
5761       Value *OpVal = OpInfo.CallOperandVal;
5762       if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
5763           isa<ConstantVector>(OpVal)) {
5764         OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
5765                                                  TLI.getPointerTy());
5766       } else {
5767         // Otherwise, create a stack slot and emit a store to it before the
5768         // asm.
5769         const Type *Ty = OpVal->getType();
5770         uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
5771         unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
5772         MachineFunction &MF = DAG.getMachineFunction();
5773         int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
5774         SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
5775         Chain = DAG.getStore(Chain, getCurDebugLoc(),
5776                              OpInfo.CallOperand, StackSlot, NULL, 0);
5777         OpInfo.CallOperand = StackSlot;
5778       }
5779
5780       // There is no longer a Value* corresponding to this operand.
5781       OpInfo.CallOperandVal = 0;
5782       // It is now an indirect operand.
5783       OpInfo.isIndirect = true;
5784     }
5785
5786     // If this constraint is for a specific register, allocate it before
5787     // anything else.
5788     if (OpInfo.ConstraintType == TargetLowering::C_Register)
5789       GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
5790   }
5791   ConstraintInfos.clear();
5792
5793
5794   // Second pass - Loop over all of the operands, assigning virtual or physregs
5795   // to register class operands.
5796   for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
5797     SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
5798
5799     // C_Register operands have already been allocated, Other/Memory don't need
5800     // to be.
5801     if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
5802       GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
5803   }
5804
5805   // AsmNodeOperands - The operands for the ISD::INLINEASM node.
5806   std::vector<SDValue> AsmNodeOperands;
5807   AsmNodeOperands.push_back(SDValue());  // reserve space for input chain
5808   AsmNodeOperands.push_back(
5809           DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other));
5810
5811
5812   // Loop over all of the inputs, copying the operand values into the
5813   // appropriate registers and processing the output regs.
5814   RegsForValue RetValRegs;
5815
5816   // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
5817   std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
5818
5819   for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
5820     SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
5821
5822     switch (OpInfo.Type) {
5823     case InlineAsm::isOutput: {
5824       if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
5825           OpInfo.ConstraintType != TargetLowering::C_Register) {
5826         // Memory output, or 'other' output (e.g. 'X' constraint).
5827         assert(OpInfo.isIndirect && "Memory output must be indirect operand");
5828
5829         // Add information to the INLINEASM node to know about this output.
5830         unsigned ResOpType = 4/*MEM*/ | (1<<3);
5831         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
5832                                                         TLI.getPointerTy()));
5833         AsmNodeOperands.push_back(OpInfo.CallOperand);
5834         break;
5835       }
5836
5837       // Otherwise, this is a register or register class output.
5838
5839       // Copy the output from the appropriate register.  Find a register that
5840       // we can use.
5841       if (OpInfo.AssignedRegs.Regs.empty()) {
5842         llvm_report_error("Couldn't allocate output reg for"
5843                           " constraint '" + OpInfo.ConstraintCode + "'!");
5844       }
5845
5846       // If this is an indirect operand, store through the pointer after the
5847       // asm.
5848       if (OpInfo.isIndirect) {
5849         IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
5850                                                       OpInfo.CallOperandVal));
5851       } else {
5852         // This is the result value of the call.
5853         assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) &&
5854                "Bad inline asm!");
5855         // Concatenate this output onto the outputs list.
5856         RetValRegs.append(OpInfo.AssignedRegs);
5857       }
5858
5859       // Add information to the INLINEASM node to know that this register is
5860       // set.
5861       OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
5862                                                6 /* EARLYCLOBBER REGDEF */ :
5863                                                2 /* REGDEF */ ,
5864                                                false,
5865                                                0,
5866                                                DAG, AsmNodeOperands);
5867       break;
5868     }
5869     case InlineAsm::isInput: {
5870       SDValue InOperandVal = OpInfo.CallOperand;
5871
5872       if (OpInfo.isMatchingInputConstraint()) {   // Matching constraint?
5873         // If this is required to match an output register we have already set,
5874         // just use its register.
5875         unsigned OperandNo = OpInfo.getMatchedOperand();
5876
5877         // Scan until we find the definition we already emitted of this operand.
5878         // When we find it, create a RegsForValue operand.
5879         unsigned CurOp = 2;  // The first operand.
5880         for (; OperandNo; --OperandNo) {
5881           // Advance to the next operand.
5882           unsigned OpFlag =
5883             cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
5884           assert(((OpFlag & 7) == 2 /*REGDEF*/ ||
5885                   (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ ||
5886                   (OpFlag & 7) == 4 /*MEM*/) &&
5887                  "Skipped past definitions?");
5888           CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
5889         }
5890
5891         unsigned OpFlag =
5892           cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
5893         if ((OpFlag & 7) == 2 /*REGDEF*/
5894             || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) {
5895           // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
5896           if (OpInfo.isIndirect) {
5897             llvm_report_error("Don't know how to handle tied indirect "
5898                               "register inputs yet!");
5899           }
5900           RegsForValue MatchedRegs;
5901           MatchedRegs.TLI = &TLI;
5902           MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
5903           EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
5904           MatchedRegs.RegVTs.push_back(RegVT);
5905           MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
5906           for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
5907                i != e; ++i)
5908             MatchedRegs.Regs.
5909               push_back(RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
5910
5911           // Use the produced MatchedRegs object to
5912           MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
5913                                     SDNodeOrder, Chain, &Flag);
5914           MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/,
5915                                            true, OpInfo.getMatchedOperand(),
5916                                            DAG, AsmNodeOperands);
5917           break;
5918         } else {
5919           assert(((OpFlag & 7) == 4) && "Unknown matching constraint!");
5920           assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 &&
5921                  "Unexpected number of operands");
5922           // Add information to the INLINEASM node to know about this input.
5923           // See InlineAsm.h isUseOperandTiedToDef.
5924           OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16);
5925           AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
5926                                                           TLI.getPointerTy()));
5927           AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
5928           break;
5929         }
5930       }
5931
5932       if (OpInfo.ConstraintType == TargetLowering::C_Other) {
5933         assert(!OpInfo.isIndirect &&
5934                "Don't know how to handle indirect other inputs yet!");
5935
5936         std::vector<SDValue> Ops;
5937         TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
5938                                          hasMemory, Ops, DAG);
5939         if (Ops.empty()) {
5940           llvm_report_error("Invalid operand for inline asm"
5941                             " constraint '" + OpInfo.ConstraintCode + "'!");
5942         }
5943
5944         // Add information to the INLINEASM node to know about this input.
5945         unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3);
5946         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
5947                                                         TLI.getPointerTy()));
5948         AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
5949         break;
5950       } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
5951         assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
5952         assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
5953                "Memory operands expect pointer values");
5954
5955         // Add information to the INLINEASM node to know about this input.
5956         unsigned ResOpType = 4/*MEM*/ | (1<<3);
5957         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
5958                                                         TLI.getPointerTy()));
5959         AsmNodeOperands.push_back(InOperandVal);
5960         break;
5961       }
5962
5963       assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
5964               OpInfo.ConstraintType == TargetLowering::C_Register) &&
5965              "Unknown constraint type!");
5966       assert(!OpInfo.isIndirect &&
5967              "Don't know how to handle indirect register inputs yet!");
5968
5969       // Copy the input into the appropriate registers.
5970       if (OpInfo.AssignedRegs.Regs.empty()) {
5971         llvm_report_error("Couldn't allocate input reg for"
5972                           " constraint '"+ OpInfo.ConstraintCode +"'!");
5973       }
5974
5975       OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
5976                                         SDNodeOrder, Chain, &Flag);
5977
5978       OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0,
5979                                                DAG, AsmNodeOperands);
5980       break;
5981     }
5982     case InlineAsm::isClobber: {
5983       // Add the clobbered value to the operand list, so that the register
5984       // allocator is aware that the physreg got clobbered.
5985       if (!OpInfo.AssignedRegs.Regs.empty())
5986         OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */,
5987                                                  false, 0, DAG,AsmNodeOperands);
5988       break;
5989     }
5990     }
5991   }
5992
5993   // Finish up input operands.
5994   AsmNodeOperands[0] = Chain;
5995   if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
5996
5997   Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
5998                       DAG.getVTList(MVT::Other, MVT::Flag),
5999                       &AsmNodeOperands[0], AsmNodeOperands.size());
6000   Flag = Chain.getValue(1);
6001
6002   // If this asm returns a register value, copy the result from that register
6003   // and set it as the value of the call.
6004   if (!RetValRegs.Regs.empty()) {
6005     SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
6006                                              SDNodeOrder, Chain, &Flag);
6007
6008     // FIXME: Why don't we do this for inline asms with MRVs?
6009     if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
6010       EVT ResultType = TLI.getValueType(CS.getType());
6011
6012       // If any of the results of the inline asm is a vector, it may have the
6013       // wrong width/num elts.  This can happen for register classes that can
6014       // contain multiple different value types.  The preg or vreg allocated may
6015       // not have the same VT as was expected.  Convert it to the right type
6016       // with bit_convert.
6017       if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
6018         Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
6019                           ResultType, Val);
6020
6021       } else if (ResultType != Val.getValueType() &&
6022                  ResultType.isInteger() && Val.getValueType().isInteger()) {
6023         // If a result value was tied to an input value, the computed result may
6024         // have a wider width than the expected result.  Extract the relevant
6025         // portion.
6026         Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);
6027       }
6028
6029       assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
6030     }
6031
6032     setValue(CS.getInstruction(), Val);
6033     // Don't need to use this as a chain in this case.
6034     if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
6035       return;
6036   }
6037
6038   std::vector<std::pair<SDValue, Value*> > StoresToEmit;
6039
6040   // Process indirect outputs, first output all of the flagged copies out of
6041   // physregs.
6042   for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
6043     RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
6044     Value *Ptr = IndirectStoresToEmit[i].second;
6045     SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
6046                                              SDNodeOrder, Chain, &Flag);
6047     StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
6048
6049   }
6050
6051   // Emit the non-flagged stores from the physregs.
6052   SmallVector<SDValue, 8> OutChains;
6053   for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i)
6054     OutChains.push_back(DAG.getStore(Chain, getCurDebugLoc(),
6055                                     StoresToEmit[i].first,
6056                                     getValue(StoresToEmit[i].second),
6057                                     StoresToEmit[i].second, 0));
6058   if (!OutChains.empty())
6059     Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
6060                         &OutChains[0], OutChains.size());
6061   DAG.setRoot(Chain);
6062 }
6063
6064 void SelectionDAGBuilder::visitVAStart(CallInst &I) {
6065   DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
6066                           MVT::Other, getRoot(),
6067                           getValue(I.getOperand(1)),
6068                           DAG.getSrcValue(I.getOperand(1))));
6069 }
6070
6071 void SelectionDAGBuilder::visitVAArg(VAArgInst &I) {
6072   SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
6073                            getRoot(), getValue(I.getOperand(0)),
6074                            DAG.getSrcValue(I.getOperand(0)));
6075   setValue(&I, V);
6076   DAG.setRoot(V.getValue(1));
6077 }
6078
6079 void SelectionDAGBuilder::visitVAEnd(CallInst &I) {
6080   DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
6081                           MVT::Other, getRoot(),
6082                           getValue(I.getOperand(1)),
6083                           DAG.getSrcValue(I.getOperand(1))));
6084 }
6085
6086 void SelectionDAGBuilder::visitVACopy(CallInst &I) {
6087   DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
6088                           MVT::Other, getRoot(),
6089                           getValue(I.getOperand(1)),
6090                           getValue(I.getOperand(2)),
6091                           DAG.getSrcValue(I.getOperand(1)),
6092                           DAG.getSrcValue(I.getOperand(2))));
6093 }
6094
6095 /// TargetLowering::LowerCallTo - This is the default LowerCallTo
6096 /// implementation, which just calls LowerCall.
6097 /// FIXME: When all targets are
6098 /// migrated to using LowerCall, this hook should be integrated into SDISel.
6099 std::pair<SDValue, SDValue>
6100 TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
6101                             bool RetSExt, bool RetZExt, bool isVarArg,
6102                             bool isInreg, unsigned NumFixedArgs,
6103                             CallingConv::ID CallConv, bool isTailCall,
6104                             bool isReturnValueUsed,
6105                             SDValue Callee,
6106                             ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) {
6107
6108   assert((!isTailCall || PerformTailCallOpt) &&
6109          "isTailCall set when tail-call optimizations are disabled!");
6110
6111   // Handle all of the outgoing arguments.
6112   SmallVector<ISD::OutputArg, 32> Outs;
6113   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
6114     SmallVector<EVT, 4> ValueVTs;
6115     ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
6116     for (unsigned Value = 0, NumValues = ValueVTs.size();
6117          Value != NumValues; ++Value) {
6118       EVT VT = ValueVTs[Value];
6119       const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
6120       SDValue Op = SDValue(Args[i].Node.getNode(),
6121                            Args[i].Node.getResNo() + Value);
6122       ISD::ArgFlagsTy Flags;
6123       unsigned OriginalAlignment =
6124         getTargetData()->getABITypeAlignment(ArgTy);
6125
6126       if (Args[i].isZExt)
6127         Flags.setZExt();
6128       if (Args[i].isSExt)
6129         Flags.setSExt();
6130       if (Args[i].isInReg)
6131         Flags.setInReg();
6132       if (Args[i].isSRet)
6133         Flags.setSRet();
6134       if (Args[i].isByVal) {
6135         Flags.setByVal();
6136         const PointerType *Ty = cast<PointerType>(Args[i].Ty);
6137         const Type *ElementTy = Ty->getElementType();
6138         unsigned FrameAlign = getByValTypeAlignment(ElementTy);
6139         unsigned FrameSize  = getTargetData()->getTypeAllocSize(ElementTy);
6140         // For ByVal, alignment should come from FE.  BE will guess if this
6141         // info is not there but there are cases it cannot get right.
6142         if (Args[i].Alignment)
6143           FrameAlign = Args[i].Alignment;
6144         Flags.setByValAlign(FrameAlign);
6145         Flags.setByValSize(FrameSize);
6146       }
6147       if (Args[i].isNest)
6148         Flags.setNest();
6149       Flags.setOrigAlign(OriginalAlignment);
6150
6151       EVT PartVT = getRegisterType(RetTy->getContext(), VT);
6152       unsigned NumParts = getNumRegisters(RetTy->getContext(), VT);
6153       SmallVector<SDValue, 4> Parts(NumParts);
6154       ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
6155
6156       if (Args[i].isSExt)
6157         ExtendKind = ISD::SIGN_EXTEND;
6158       else if (Args[i].isZExt)
6159         ExtendKind = ISD::ZERO_EXTEND;
6160
6161       getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, PartVT, ExtendKind);
6162
6163       for (unsigned j = 0; j != NumParts; ++j) {
6164         // if it isn't first piece, alignment must be 1
6165         ISD::OutputArg MyFlags(Flags, Parts[j], i < NumFixedArgs);
6166         if (NumParts > 1 && j == 0)
6167           MyFlags.Flags.setSplit();
6168         else if (j != 0)
6169           MyFlags.Flags.setOrigAlign(1);
6170
6171         Outs.push_back(MyFlags);
6172       }
6173     }
6174   }
6175
6176   // Handle the incoming return values from the call.
6177   SmallVector<ISD::InputArg, 32> Ins;
6178   SmallVector<EVT, 4> RetTys;
6179   ComputeValueVTs(*this, RetTy, RetTys);
6180   for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
6181     EVT VT = RetTys[I];
6182     EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
6183     unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
6184     for (unsigned i = 0; i != NumRegs; ++i) {
6185       ISD::InputArg MyFlags;
6186       MyFlags.VT = RegisterVT;
6187       MyFlags.Used = isReturnValueUsed;
6188       if (RetSExt)
6189         MyFlags.Flags.setSExt();
6190       if (RetZExt)
6191         MyFlags.Flags.setZExt();
6192       if (isInreg)
6193         MyFlags.Flags.setInReg();
6194       Ins.push_back(MyFlags);
6195     }
6196   }
6197
6198   // Check if target-dependent constraints permit a tail call here.
6199   // Target-independent constraints should be checked by the caller.
6200   if (isTailCall &&
6201       !IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG))
6202     isTailCall = false;
6203
6204   SmallVector<SDValue, 4> InVals;
6205   Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
6206                     Outs, Ins, dl, DAG, InVals);
6207
6208   // Verify that the target's LowerCall behaved as expected.
6209   assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
6210          "LowerCall didn't return a valid chain!");
6211   assert((!isTailCall || InVals.empty()) &&
6212          "LowerCall emitted a return value for a tail call!");
6213   assert((isTailCall || InVals.size() == Ins.size()) &&
6214          "LowerCall didn't emit the correct number of values!");
6215   DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
6216           assert(InVals[i].getNode() &&
6217                  "LowerCall emitted a null value!");
6218           assert(Ins[i].VT == InVals[i].getValueType() &&
6219                  "LowerCall emitted a value with the wrong type!");
6220         });
6221
6222   // For a tail call, the return value is merely live-out and there aren't
6223   // any nodes in the DAG representing it. Return a special value to
6224   // indicate that a tail call has been emitted and no more Instructions
6225   // should be processed in the current block.
6226   if (isTailCall) {
6227     DAG.setRoot(Chain);
6228     return std::make_pair(SDValue(), SDValue());
6229   }
6230
6231   // Collect the legal value parts into potentially illegal values
6232   // that correspond to the original function's return values.
6233   ISD::NodeType AssertOp = ISD::DELETED_NODE;
6234   if (RetSExt)
6235     AssertOp = ISD::AssertSext;
6236   else if (RetZExt)
6237     AssertOp = ISD::AssertZext;
6238   SmallVector<SDValue, 4> ReturnValues;
6239   unsigned CurReg = 0;
6240   for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
6241     EVT VT = RetTys[I];
6242     EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
6243     unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
6244
6245     SDValue ReturnValue =
6246       getCopyFromParts(DAG, dl, &InVals[CurReg], NumRegs, RegisterVT, VT,
6247                        AssertOp);
6248     ReturnValues.push_back(ReturnValue);
6249     CurReg += NumRegs;
6250   }
6251
6252   // For a function returning void, there is no return value. We can't create
6253   // such a node, so we just return a null return value in that case. In
6254   // that case, nothing will actualy look at the value.
6255   if (ReturnValues.empty())
6256     return std::make_pair(SDValue(), Chain);
6257
6258   SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
6259                             DAG.getVTList(&RetTys[0], RetTys.size()),
6260                             &ReturnValues[0], ReturnValues.size());
6261
6262   return std::make_pair(Res, Chain);
6263 }
6264
6265 void TargetLowering::LowerOperationWrapper(SDNode *N,
6266                                            SmallVectorImpl<SDValue> &Results,
6267                                            SelectionDAG &DAG) {
6268   SDValue Res = LowerOperation(SDValue(N, 0), DAG);
6269   if (Res.getNode())
6270     Results.push_back(Res);
6271 }
6272
6273 SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
6274   llvm_unreachable("LowerOperation not implemented for this target!");
6275   return SDValue();
6276 }
6277
6278
6279 void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
6280   SDValue Op = getValue(V);
6281   assert((Op.getOpcode() != ISD::CopyFromReg ||
6282           cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
6283          "Copy from a reg to the same reg!");
6284   assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
6285
6286   RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
6287   SDValue Chain = DAG.getEntryNode();
6288   RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), SDNodeOrder, Chain, 0);
6289   PendingExports.push_back(Chain);
6290 }
6291
6292 #include "llvm/CodeGen/SelectionDAGISel.h"
6293
6294 void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
6295   // If this is the entry block, emit arguments.
6296   Function &F = *LLVMBB->getParent();
6297   SelectionDAG &DAG = SDB->DAG;
6298   SDValue OldRoot = DAG.getRoot();
6299   DebugLoc dl = SDB->getCurDebugLoc();
6300   const TargetData *TD = TLI.getTargetData();
6301   SmallVector<ISD::InputArg, 16> Ins;
6302
6303   // Check whether the function can return without sret-demotion.
6304   SmallVector<EVT, 4> OutVTs;
6305   SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
6306   getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
6307                 OutVTs, OutsFlags, TLI);
6308   FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
6309
6310   FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(),
6311     OutVTs, OutsFlags, DAG);
6312   if (!FLI.CanLowerReturn) {
6313     // Put in an sret pointer parameter before all the other parameters.
6314     SmallVector<EVT, 1> ValueVTs;
6315     ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
6316
6317     // NOTE: Assuming that a pointer will never break down to more than one VT
6318     // or one register.
6319     ISD::ArgFlagsTy Flags;
6320     Flags.setSRet();
6321     EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), ValueVTs[0]);
6322     ISD::InputArg RetArg(Flags, RegisterVT, true);
6323     Ins.push_back(RetArg);
6324   }
6325
6326   // Set up the incoming argument description vector.
6327   unsigned Idx = 1;
6328   for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
6329        I != E; ++I, ++Idx) {
6330     SmallVector<EVT, 4> ValueVTs;
6331     ComputeValueVTs(TLI, I->getType(), ValueVTs);
6332     bool isArgValueUsed = !I->use_empty();
6333     for (unsigned Value = 0, NumValues = ValueVTs.size();
6334          Value != NumValues; ++Value) {
6335       EVT VT = ValueVTs[Value];
6336       const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
6337       ISD::ArgFlagsTy Flags;
6338       unsigned OriginalAlignment =
6339         TD->getABITypeAlignment(ArgTy);
6340
6341       if (F.paramHasAttr(Idx, Attribute::ZExt))
6342         Flags.setZExt();
6343       if (F.paramHasAttr(Idx, Attribute::SExt))
6344         Flags.setSExt();
6345       if (F.paramHasAttr(Idx, Attribute::InReg))
6346         Flags.setInReg();
6347       if (F.paramHasAttr(Idx, Attribute::StructRet))
6348         Flags.setSRet();
6349       if (F.paramHasAttr(Idx, Attribute::ByVal)) {
6350         Flags.setByVal();
6351         const PointerType *Ty = cast<PointerType>(I->getType());
6352         const Type *ElementTy = Ty->getElementType();
6353         unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy);
6354         unsigned FrameSize  = TD->getTypeAllocSize(ElementTy);
6355         // For ByVal, alignment should be passed from FE.  BE will guess if
6356         // this info is not there but there are cases it cannot get right.
6357         if (F.getParamAlignment(Idx))
6358           FrameAlign = F.getParamAlignment(Idx);
6359         Flags.setByValAlign(FrameAlign);
6360         Flags.setByValSize(FrameSize);
6361       }
6362       if (F.paramHasAttr(Idx, Attribute::Nest))
6363         Flags.setNest();
6364       Flags.setOrigAlign(OriginalAlignment);
6365
6366       EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
6367       unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
6368       for (unsigned i = 0; i != NumRegs; ++i) {
6369         ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
6370         if (NumRegs > 1 && i == 0)
6371           MyFlags.Flags.setSplit();
6372         // if it isn't first piece, alignment must be 1
6373         else if (i > 0)
6374           MyFlags.Flags.setOrigAlign(1);
6375         Ins.push_back(MyFlags);
6376       }
6377     }
6378   }
6379
6380   // Call the target to set up the argument values.
6381   SmallVector<SDValue, 8> InVals;
6382   SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
6383                                              F.isVarArg(), Ins,
6384                                              dl, DAG, InVals);
6385
6386   // Verify that the target's LowerFormalArguments behaved as expected.
6387   assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
6388          "LowerFormalArguments didn't return a valid chain!");
6389   assert(InVals.size() == Ins.size() &&
6390          "LowerFormalArguments didn't emit the correct number of values!");
6391   DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
6392           assert(InVals[i].getNode() &&
6393                  "LowerFormalArguments emitted a null value!");
6394           assert(Ins[i].VT == InVals[i].getValueType() &&
6395                  "LowerFormalArguments emitted a value with the wrong type!");
6396         });
6397
6398   // Update the DAG with the new chain value resulting from argument lowering.
6399   DAG.setRoot(NewRoot);
6400
6401   // Set up the argument values.
6402   unsigned i = 0;
6403   Idx = 1;
6404   if (!FLI.CanLowerReturn) {
6405     // Create a virtual register for the sret pointer, and put in a copy
6406     // from the sret argument into it.
6407     SmallVector<EVT, 1> ValueVTs;
6408     ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
6409     EVT VT = ValueVTs[0];
6410     EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
6411     ISD::NodeType AssertOp = ISD::DELETED_NODE;
6412     SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT,
6413                                         VT, AssertOp);
6414
6415     MachineFunction& MF = SDB->DAG.getMachineFunction();
6416     MachineRegisterInfo& RegInfo = MF.getRegInfo();
6417     unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
6418     FLI.DemoteRegister = SRetReg;
6419     NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(), SRetReg, ArgValue);
6420     DAG.setRoot(NewRoot);
6421
6422     // i indexes lowered arguments.  Bump it past the hidden sret argument.
6423     // Idx indexes LLVM arguments.  Don't touch it.
6424     ++i;
6425   }
6426   for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
6427       ++I, ++Idx) {
6428     SmallVector<SDValue, 4> ArgValues;
6429     SmallVector<EVT, 4> ValueVTs;
6430     ComputeValueVTs(TLI, I->getType(), ValueVTs);
6431     unsigned NumValues = ValueVTs.size();
6432     for (unsigned Value = 0; Value != NumValues; ++Value) {
6433       EVT VT = ValueVTs[Value];
6434       EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
6435       unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
6436
6437       if (!I->use_empty()) {
6438         ISD::NodeType AssertOp = ISD::DELETED_NODE;
6439         if (F.paramHasAttr(Idx, Attribute::SExt))
6440           AssertOp = ISD::AssertSext;
6441         else if (F.paramHasAttr(Idx, Attribute::ZExt))
6442           AssertOp = ISD::AssertZext;
6443
6444         ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
6445                                              PartVT, VT, AssertOp));
6446       }
6447       i += NumParts;
6448     }
6449     if (!I->use_empty()) {
6450       SDB->setValue(I, DAG.getMergeValues(&ArgValues[0], NumValues,
6451                                           SDB->getCurDebugLoc()));
6452       // If this argument is live outside of the entry block, insert a copy from
6453       // whereever we got it to the vreg that other BB's will reference it as.
6454       SDB->CopyToExportRegsIfNeeded(I);
6455     }
6456   }
6457   assert(i == InVals.size() && "Argument register count mismatch!");
6458
6459   // Finally, if the target has anything special to do, allow it to do so.
6460   // FIXME: this should insert code into the DAG!
6461   EmitFunctionEntryCode(F, SDB->DAG.getMachineFunction());
6462 }
6463
6464 /// Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
6465 /// ensure constants are generated when needed.  Remember the virtual registers
6466 /// that need to be added to the Machine PHI nodes as input.  We cannot just
6467 /// directly add them, because expansion might result in multiple MBB's for one
6468 /// BB.  As such, the start of the BB might correspond to a different MBB than
6469 /// the end.
6470 ///
6471 void
6472 SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {
6473   TerminatorInst *TI = LLVMBB->getTerminator();
6474
6475   SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
6476
6477   // Check successor nodes' PHI nodes that expect a constant to be available
6478   // from this block.
6479   for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
6480     BasicBlock *SuccBB = TI->getSuccessor(succ);
6481     if (!isa<PHINode>(SuccBB->begin())) continue;
6482     MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
6483
6484     // If this terminator has multiple identical successors (common for
6485     // switches), only handle each succ once.
6486     if (!SuccsHandled.insert(SuccMBB)) continue;
6487
6488     MachineBasicBlock::iterator MBBI = SuccMBB->begin();
6489     PHINode *PN;
6490
6491     // At this point we know that there is a 1-1 correspondence between LLVM PHI
6492     // nodes and Machine PHI nodes, but the incoming operands have not been
6493     // emitted yet.
6494     for (BasicBlock::iterator I = SuccBB->begin();
6495          (PN = dyn_cast<PHINode>(I)); ++I) {
6496       // Ignore dead phi's.
6497       if (PN->use_empty()) continue;
6498
6499       unsigned Reg;
6500       Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
6501
6502       if (Constant *C = dyn_cast<Constant>(PHIOp)) {
6503         unsigned &RegOut = SDB->ConstantsOut[C];
6504         if (RegOut == 0) {
6505           RegOut = FuncInfo->CreateRegForValue(C);
6506           SDB->CopyValueToVirtualRegister(C, RegOut);
6507         }
6508         Reg = RegOut;
6509       } else {
6510         Reg = FuncInfo->ValueMap[PHIOp];
6511         if (Reg == 0) {
6512           assert(isa<AllocaInst>(PHIOp) &&
6513                  FuncInfo->StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
6514                  "Didn't codegen value into a register!??");
6515           Reg = FuncInfo->CreateRegForValue(PHIOp);
6516           SDB->CopyValueToVirtualRegister(PHIOp, Reg);
6517         }
6518       }
6519
6520       // Remember that this register needs to added to the machine PHI node as
6521       // the input for this MBB.
6522       SmallVector<EVT, 4> ValueVTs;
6523       ComputeValueVTs(TLI, PN->getType(), ValueVTs);
6524       for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
6525         EVT VT = ValueVTs[vti];
6526         unsigned NumRegisters = TLI.getNumRegisters(*CurDAG->getContext(), VT);
6527         for (unsigned i = 0, e = NumRegisters; i != e; ++i)
6528           SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
6529         Reg += NumRegisters;
6530       }
6531     }
6532   }
6533   SDB->ConstantsOut.clear();
6534 }
6535
6536 /// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only
6537 /// supports legal types, and it emits MachineInstrs directly instead of
6538 /// creating SelectionDAG nodes.
6539 ///
6540 bool
6541 SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB,
6542                                                       FastISel *F) {
6543   TerminatorInst *TI = LLVMBB->getTerminator();
6544
6545   SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
6546   unsigned OrigNumPHINodesToUpdate = SDB->PHINodesToUpdate.size();
6547
6548   // Check successor nodes' PHI nodes that expect a constant to be available
6549   // from this block.
6550   for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
6551     BasicBlock *SuccBB = TI->getSuccessor(succ);
6552     if (!isa<PHINode>(SuccBB->begin())) continue;
6553     MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
6554
6555     // If this terminator has multiple identical successors (common for
6556     // switches), only handle each succ once.
6557     if (!SuccsHandled.insert(SuccMBB)) continue;
6558
6559     MachineBasicBlock::iterator MBBI = SuccMBB->begin();
6560     PHINode *PN;
6561
6562     // At this point we know that there is a 1-1 correspondence between LLVM PHI
6563     // nodes and Machine PHI nodes, but the incoming operands have not been
6564     // emitted yet.
6565     for (BasicBlock::iterator I = SuccBB->begin();
6566          (PN = dyn_cast<PHINode>(I)); ++I) {
6567       // Ignore dead phi's.
6568       if (PN->use_empty()) continue;
6569
6570       // Only handle legal types. Two interesting things to note here. First,
6571       // by bailing out early, we may leave behind some dead instructions,
6572       // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
6573       // own moves. Second, this check is necessary becuase FastISel doesn't
6574       // use CreateRegForValue to create registers, so it always creates
6575       // exactly one register for each non-void instruction.
6576       EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
6577       if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
6578         // Promote MVT::i1.
6579         if (VT == MVT::i1)
6580           VT = TLI.getTypeToTransformTo(*CurDAG->getContext(), VT);
6581         else {
6582           SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
6583           return false;
6584         }
6585       }
6586
6587       Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
6588
6589       unsigned Reg = F->getRegForValue(PHIOp);
6590       if (Reg == 0) {
6591         SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
6592         return false;
6593       }
6594       SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
6595     }
6596   }
6597
6598   return true;
6599 }