[WinEH] Add some support for code generating catchpad

[oota-llvm.git] / lib / Target / X86 / X86ISelLowering.h
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index a16cf4a0b64ebcdcfd48923cbf286e0f6caf8b73..69bb0e3b0e4165cf907b4cc65166f892ef266194 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -26,41 +26,37 @@ namespace llvm {
  
    namespace X86ISD {
      // X86 Specific DAG Nodes
-    enum NodeType {
+    enum NodeType : unsigned {
        // Start the numbering where the builtin ops leave off.
        FIRST_NUMBER = ISD::BUILTIN_OP_END,
  
-      /// BSF - Bit scan forward.
-      /// BSR - Bit scan reverse.
+      /// Bit scan forward.
        BSF,
+      /// Bit scan reverse.
        BSR,
  
-      /// SHLD, SHRD - Double shift instructions. These correspond to
+      /// Double shift instructions. These correspond to
        /// X86::SHLDxx and X86::SHRDxx instructions.
        SHLD,
        SHRD,
  
-      /// FAND - Bitwise logical AND of floating point values. This corresponds
+      /// Bitwise logical AND of floating point values. This corresponds
        /// to X86::ANDPS or X86::ANDPD.
        FAND,
  
-      /// FOR - Bitwise logical OR of floating point values. This corresponds
+      /// Bitwise logical OR of floating point values. This corresponds
        /// to X86::ORPS or X86::ORPD.
        FOR,
  
-      /// FXOR - Bitwise logical XOR of floating point values. This corresponds
+      /// Bitwise logical XOR of floating point values. This corresponds
        /// to X86::XORPS or X86::XORPD.
        FXOR,
  
-      /// FANDN - Bitwise logical ANDNOT of floating point values. This
+      ///  Bitwise logical ANDNOT of floating point values. This
        /// corresponds to X86::ANDNPS or X86::ANDNPD.
        FANDN,
  
-      /// FSRL - Bitwise logical right shift of floating point values. These
-      /// corresponds to X86::PSRLDQ.
-      FSRL,
-
-      /// CALL - These operations represent an abstract X86 call
+      /// These operations represent an abstract X86 call
        /// instruction, which includes a bunch of information.  In particular the
        /// operands of these node are:
        ///
@@ -79,8 +75,7 @@ namespace llvm {
        ///
        CALL,
  
-      /// RDTSC_DAG - This operation implements the lowering for
-      /// readcyclecounter
+      /// This operation implements the lowering for readcyclecounter
        RDTSC_DAG,
  
        /// X86 Read Time-Stamp Counter and Processor ID.
@@ -131,189 +126,225 @@ namespace llvm {
        /// 1 is the number of bytes of stack to pop.
        RET_FLAG,
  
-      /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx.
+      /// Repeat fill, corresponds to X86::REP_STOSx.
        REP_STOS,
  
-      /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
+      /// Repeat move, corresponds to X86::REP_MOVSx.
        REP_MOVS,
  
-      /// GlobalBaseReg - On Darwin, this node represents the result of the popl
+      /// On Darwin, this node represents the result of the popl
        /// at function entry, used for PIC code.
        GlobalBaseReg,
  
-      /// Wrapper - A wrapper node for TargetConstantPool,
+      /// A wrapper node for TargetConstantPool,
        /// TargetExternalSymbol, and TargetGlobalAddress.
        Wrapper,
  
-      /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP
+      /// Special wrapper used under X86-64 PIC mode for RIP
        /// relative displacements.
        WrapperRIP,
  
-      /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
+      /// Copies a 64-bit value from the low word of an XMM vector
        /// to an MMX vector.  If you think this is too close to the previous
        /// mnemonic, so do I; blame Intel.
        MOVDQ2Q,
  
-      /// MMX_MOVD2W - Copies a 32-bit value from the low word of a MMX
+      /// Copies a 32-bit value from the low word of a MMX
        /// vector to a GPR.
        MMX_MOVD2W,
  
-      /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
+      /// Copies a GPR into the low 32-bit word of a MMX vector
+      /// and zero out the high word.
+      MMX_MOVW2D,
+
+      /// Extract an 8-bit value from a vector and zero extend it to
        /// i32, corresponds to X86::PEXTRB.
        PEXTRB,
  
-      /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to
+      /// Extract a 16-bit value from a vector and zero extend it to
        /// i32, corresponds to X86::PEXTRW.
        PEXTRW,
  
-      /// INSERTPS - Insert any element of a 4 x float vector into any element
+      /// Insert any element of a 4 x float vector into any element
        /// of a destination 4 x floatvector.
        INSERTPS,
  
-      /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector,
+      /// Insert the lower 8-bits of a 32-bit value to a vector,
        /// corresponds to X86::PINSRB.
        PINSRB,
  
-      /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector,
+      /// Insert the lower 16-bits of a 32-bit value to a vector,
        /// corresponds to X86::PINSRW.
        PINSRW, MMX_PINSRW,
  
-      /// PSHUFB - Shuffle 16 8-bit values within a vector.
+      /// Shuffle 16 8-bit values within a vector.
        PSHUFB,
  
-      /// ANDNP - Bitwise Logical AND NOT of Packed FP values.
+      /// Compute Sum of Absolute Differences.
+      PSADBW,
+
+      /// Bitwise Logical AND NOT of Packed FP values.
        ANDNP,
  
-      /// PSIGN - Copy integer sign.
+      /// Copy integer sign.
        PSIGN,
  
-      /// BLENDV - Blend where the selector is a register.
-      BLENDV,
-
-      /// BLENDI - Blend where the selector is an immediate.
+      /// Blend where the selector is an immediate.
        BLENDI,
  
-      /// ADDSUB - Combined add and sub on an FP vector.
+      /// Blend where the condition has been shrunk.
+      /// This is used to emphasize that the condition mask is
+      /// no more valid for generic VSELECT optimizations.
+      SHRUNKBLEND,
+
+      /// Combined add and sub on an FP vector.
        ADDSUB,
  
-      // SUBUS - Integer sub with unsigned saturation.
+      //  FP vector ops with rounding mode.
+      FADD_RND,
+      FSUB_RND,
+      FMUL_RND,
+      FDIV_RND,
+      FMAX_RND,
+      FMIN_RND,
+      FSQRT_RND,
+
+      // FP vector get exponent 
+      FGETEXP_RND,
+      // FP Scale
+      SCALEF,
+      // Integer add/sub with unsigned saturation.
+      ADDUS,
        SUBUS,
-
-      /// HADD - Integer horizontal add.
+      // Integer add/sub with signed saturation.
+      ADDS,
+      SUBS,
+      // Unsigned Integer average 
+      AVG,
+      /// Integer horizontal add.
        HADD,
  
-      /// HSUB - Integer horizontal sub.
+      /// Integer horizontal sub.
        HSUB,
  
-      /// FHADD - Floating point horizontal add.
+      /// Floating point horizontal add.
        FHADD,
  
-      /// FHSUB - Floating point horizontal sub.
+      /// Floating point horizontal sub.
        FHSUB,
  
-      /// UMAX, UMIN - Unsigned integer max and min.
-      UMAX, UMIN,
-
-      /// SMAX, SMIN - Signed integer max and min.
-      SMAX, SMIN,
+      // Integer absolute value
+      ABS,
  
-      /// FMAX, FMIN - Floating point max and min.
-      ///
+      /// Floating point max and min.
        FMAX, FMIN,
  
-      /// FMAXC, FMINC - Commutative FMIN and FMAX.
+      /// Commutative FMIN and FMAX.
        FMAXC, FMINC,
  
-      /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
-      /// approximation.  Note that these typically require refinement
+      /// Floating point reciprocal-sqrt and reciprocal approximation.
+      /// Note that these typically require refinement
        /// in order to obtain suitable precision.
        FRSQRT, FRCP,
  
-      // TLSADDR - Thread Local Storage.
+      // Thread Local Storage.
        TLSADDR,
  
-      // TLSBASEADDR - Thread Local Storage. A call to get the start address
+      // Thread Local Storage. A call to get the start address
        // of the TLS block for the current module.
        TLSBASEADDR,
  
-      // TLSCALL - Thread Local Storage.  When calling to an OS provided
+      // Thread Local Storage.  When calling to an OS provided
        // thunk at the address from an earlier relocation.
        TLSCALL,
  
-      // EH_RETURN - Exception Handling helpers.
+      // Exception Handling helpers.
        EH_RETURN,
  
-      // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+      CATCHRET,
+
+      // SjLj exception handling setjmp.
        EH_SJLJ_SETJMP,
  
-      // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+      // SjLj exception handling longjmp.
        EH_SJLJ_LONGJMP,
  
-      /// TC_RETURN - Tail call return. See X86TargetLowering::LowerCall for
+      /// Tail call return. See X86TargetLowering::LowerCall for
        /// the list of operands.
        TC_RETURN,
  
-      // VZEXT_MOVL - Vector move to low scalar and zero higher vector elements.
+      // Vector move to low scalar and zero higher vector elements.
        VZEXT_MOVL,
  
-      // VZEXT - Vector integer zero-extend.
+      // Vector integer zero-extend.
        VZEXT,
  
-      // VSEXT - Vector integer signed-extend.
+      // Vector integer signed-extend.
        VSEXT,
  
-      // VTRUNC - Vector integer truncate.
+      // Vector integer truncate.
        VTRUNC,
+      // Vector integer truncate with unsigned/signed saturation.
+      VTRUNCUS, VTRUNCS,
  
-      // VTRUNC - Vector integer truncate with mask.
-      VTRUNCM,
-
-      // VFPEXT - Vector FP extend.
+      // Vector FP extend.
        VFPEXT,
  
-      // VFPROUND - Vector FP round.
+      // Vector FP round.
        VFPROUND,
  
-      // VSHL, VSRL - 128-bit vector logical left / right shift
+      // Vector signed/unsigned integer to double.
+      CVTDQ2PD, CVTUDQ2PD,
+
+      // 128-bit vector logical left / right shift
        VSHLDQ, VSRLDQ,
  
-      // VSHL, VSRL, VSRA - Vector shift elements
+      // Vector shift elements
        VSHL, VSRL, VSRA,
  
-      // VSHLI, VSRLI, VSRAI - Vector shift elements by immediate
+      // Vector shift elements by immediate
        VSHLI, VSRLI, VSRAI,
  
-      // CMPP - Vector packed double/float comparison.
+      // Vector packed double/float comparison.
        CMPP,
  
-      // PCMP* - Vector integer comparisons.
+      // Vector integer comparisons.
        PCMPEQ, PCMPGT,
-      // PCMP*M - Vector integer comparisons, the result is in a mask vector.
+      // Vector integer comparisons, the result is in a mask vector.
        PCMPEQM, PCMPGTM,
  
-      /// CMPM, CMPMU - Vector comparison generating mask bits for fp and
+      /// Vector comparison generating mask bits for fp and
        /// integer signed and unsigned data types.
        CMPM,
        CMPMU,
+      // Vector comparison with rounding mode for FP values
+      CMPM_RND,
  
-      // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
+      // Arithmetic operations with FLAGS results.
        ADD, SUB, ADC, SBB, SMUL,
        INC, DEC, OR, XOR, AND,
  
-      BEXTR,  // BEXTR - Bit field extract
+      BEXTR,  // Bit field extract
  
        UMUL, // LOW, HI, FLAGS = umul LHS, RHS
  
-      // MUL_IMM - X86 specific multiply by immediate.
+      // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS
+      SMUL8, UMUL8,
+
+      // 8-bit divrem that zero-extend the high result (AH).
+      UDIVREM8_ZEXT_HREG,
+      SDIVREM8_SEXT_HREG,
+
+      // X86-specific multiply by immediate.
        MUL_IMM,
  
-      // PTEST - Vector bitwise comparisons.
+      // Vector bitwise comparisons.
        PTEST,
  
-      // TESTP - Vector packed fp sign bitwise comparisons.
+      // Vector packed fp sign bitwise comparisons.
        TESTP,
  
-      // TESTM, TESTNM - Vector "test" in AVX-512, the result is in a mask vector.
+      // Vector "test" in AVX-512, the result is in a mask vector.
        TESTM,
        TESTNM,
  
@@ -331,6 +362,8 @@ namespace llvm {
        PSHUFHW,
        PSHUFLW,
        SHUFP,
+      //Shuffle Packed Values at 128-bit granularity
+      SHUF128,
        MOVDDUP,
        MOVSHDUP,
        MOVSLDUP,
@@ -350,18 +383,33 @@ namespace llvm {
        VPERMIV3,
        VPERMI,
        VPERM2X128,
+      //Fix Up Special Packed Float32/64 values
+      VFIXUPIMM,
+      //Range Restriction Calculation For Packed Pairs of Float32/64 values
+      VRANGE,
+      // Reduce - Perform Reduction Transformation on scalar\packed FP
+      VREDUCE,
+      // RndScale - Round FP Values To Include A Given Number Of Fraction Bits
+      VRNDSCALE,
+      // Broadcast scalar to vector
        VBROADCAST,
-      // masked broadcast
-      VBROADCASTM,
+      // Broadcast subvector to vector
+      SUBV_BROADCAST,
        // Insert/Extract vector element
        VINSERT,
        VEXTRACT,
  
-      // PMULUDQ - Vector multiply packed unsigned doubleword integers
+      /// SSE4A Extraction and Insertion.
+      EXTRQI, INSERTQI,
+
+      // Vector multiply packed unsigned doubleword integers
        PMULUDQ,
-      // PMULUDQ - Vector multiply packed signed doubleword integers
+      // Vector multiply packed signed doubleword integers
        PMULDQ,
-
+      // Vector Multiply Packed UnsignedIntegers with Round and Scale
+      MULHRS,
+      // Multiply and Add Packed Integers
+      VPMADDUBSW, VPMADDWD,
        // FMA nodes
        FMADD,
        FNMADD,
@@ -369,61 +417,77 @@ namespace llvm {
        FNMSUB,
        FMADDSUB,
        FMSUBADD,
-
-      // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
-      // according to %al. An operator is needed so that this can be expanded
-      // with control flow.
+      // FMA with rounding mode
+      FMADD_RND,
+      FNMADD_RND,
+      FMSUB_RND,
+      FNMSUB_RND,
+      FMADDSUB_RND,
+      FMSUBADD_RND,
+
+      // Compress and expand
+      COMPRESS,
+      EXPAND,
+
+      //Convert Unsigned/Integer to Scalar Floating-Point Value
+      //with rounding mode
+      SINT_TO_FP_RND,
+      UINT_TO_FP_RND,
+
+      // Vector float/double to signed/unsigned integer.
+      FP_TO_SINT_RND, FP_TO_UINT_RND,
+      // Save xmm argument registers to the stack, according to %al. An operator
+      // is needed so that this can be expanded with control flow.
        VASTART_SAVE_XMM_REGS,
  
-      // WIN_ALLOCA - Windows's _chkstk call to do stack probing.
+      // Windows's _chkstk call to do stack probing.
        WIN_ALLOCA,
  
-      // SEG_ALLOCA - For allocating variable amounts of stack space when using
+      // For allocating variable amounts of stack space when using
        // segmented stacks. Check if the current stacklet has enough space, and
        // falls back to heap allocation if not.
        SEG_ALLOCA,
  
-      // WIN_FTOL - Windows's _ftol2 runtime routine to do fptoui.
-      WIN_FTOL,
-
        // Memory barrier
        MEMBARRIER,
        MFENCE,
        SFENCE,
        LFENCE,
  
-      // FNSTSW16r - Store FP status word into i16 register.
+      // Store FP status word into i16 register.
        FNSTSW16r,
  
-      // SAHF - Store contents of %ah into %eflags.
+      // Store contents of %ah into %eflags.
        SAHF,
  
-      // RDRAND - Get a random integer and indicate whether it is valid in CF.
+      // Get a random integer and indicate whether it is valid in CF.
        RDRAND,
  
-      // RDSEED - Get a NIST SP800-90B & C compliant random integer and
+      // Get a NIST SP800-90B & C compliant random integer and
        // indicate whether it is valid in CF.
        RDSEED,
  
-      // PCMP*STRI
        PCMPISTRI,
        PCMPESTRI,
  
-      // XTEST - Test if in transactional execution.
+      // Test if in transactional execution.
        XTEST,
  
-      // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap.
+      // ERI instructions
+      RSQRT28, RCP28, EXP2,
+
+      // Compare and swap.
        LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
        LCMPXCHG8_DAG,
        LCMPXCHG16_DAG,
  
-      // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
+      // Load, scalar_to_vector, and zero extend.
        VZEXT_LOAD,
  
-      // FNSTCW16m - Store FP control world into i16 memory.
+      // Store FP control world into i16 memory.
        FNSTCW16m,
  
-      /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
+      /// This instruction implements FP_TO_SINT with the
        /// integer destination in memory and a FP reg source.  This corresponds
        /// to the X86::FIST*m instructions and the rounding mode change stuff. It
        /// has two inputs (token chain and address) and two outputs (int value
@@ -432,7 +496,7 @@ namespace llvm {
        FP_TO_INT32_IN_MEM,
        FP_TO_INT64_IN_MEM,
  
-      /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
+      /// This instruction implements SINT_TO_FP with the
        /// integer source in memory and FP reg result.  This corresponds to the
        /// X86::FILD*m instructions. It has three inputs (token chain, address,
        /// and source type) and two outputs (FP value and token chain). FILD_FLAG
@@ -440,19 +504,19 @@ namespace llvm {
        FILD,
        FILD_FLAG,
  
-      /// FLD - This instruction implements an extending load to FP stack slots.
+      /// This instruction implements an extending load to FP stack slots.
        /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
        /// operand, ptr to load from, and a ValueType node indicating the type
        /// to load to.
        FLD,
  
-      /// FST - This instruction implements a truncating store to FP stack
+      /// This instruction implements a truncating store to FP stack
        /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
        /// chain operand, value to store, address, and a ValueType to store it
        /// as.
        FST,
  
-      /// VAARG_64 - This instruction grabs the address of the next argument
+      /// This instruction grabs the address of the next argument
        /// from a va_list. (reads and modifies the va_list in memory)
        VAARG_64
  
@@ -464,57 +528,56 @@ namespace llvm {
  
    /// Define some predicates that are used for node matching.
    namespace X86 {
-    /// isVEXTRACT128Index - Return true if the specified
+    /// Return true if the specified
      /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
      /// suitable for input to VEXTRACTF128, VEXTRACTI128 instructions.
      bool isVEXTRACT128Index(SDNode *N);
  
-    /// isVINSERT128Index - Return true if the specified
+    /// Return true if the specified
      /// INSERT_SUBVECTOR operand specifies a subvector insert that is
      /// suitable for input to VINSERTF128, VINSERTI128 instructions.
      bool isVINSERT128Index(SDNode *N);
  
-    /// isVEXTRACT256Index - Return true if the specified
+    /// Return true if the specified
      /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
      /// suitable for input to VEXTRACTF64X4, VEXTRACTI64X4 instructions.
      bool isVEXTRACT256Index(SDNode *N);
  
-    /// isVINSERT256Index - Return true if the specified
+    /// Return true if the specified
      /// INSERT_SUBVECTOR operand specifies a subvector insert that is
      /// suitable for input to VINSERTF64X4, VINSERTI64X4 instructions.
      bool isVINSERT256Index(SDNode *N);
  
-    /// getExtractVEXTRACT128Immediate - Return the appropriate
+    /// Return the appropriate
      /// immediate to extract the specified EXTRACT_SUBVECTOR index
      /// with VEXTRACTF128, VEXTRACTI128 instructions.
      unsigned getExtractVEXTRACT128Immediate(SDNode *N);
  
-    /// getInsertVINSERT128Immediate - Return the appropriate
+    /// Return the appropriate
      /// immediate to insert at the specified INSERT_SUBVECTOR index
      /// with VINSERTF128, VINSERT128 instructions.
      unsigned getInsertVINSERT128Immediate(SDNode *N);
  
-    /// getExtractVEXTRACT256Immediate - Return the appropriate
+    /// Return the appropriate
      /// immediate to extract the specified EXTRACT_SUBVECTOR index
      /// with VEXTRACTF64X4, VEXTRACTI64x4 instructions.
      unsigned getExtractVEXTRACT256Immediate(SDNode *N);
  
-    /// getInsertVINSERT256Immediate - Return the appropriate
+    /// Return the appropriate
      /// immediate to insert at the specified INSERT_SUBVECTOR index
      /// with VINSERTF64x4, VINSERTI64x4 instructions.
      unsigned getInsertVINSERT256Immediate(SDNode *N);
  
-    /// isZeroNode - Returns true if Elt is a constant zero or a floating point
-    /// constant +0.0.
+    /// Returns true if Elt is a constant zero or floating point constant +0.0.
      bool isZeroNode(SDValue Elt);
  
-    /// isOffsetSuitableForCodeModel - Returns true of the given offset can be
+    /// Returns true of the given offset can be
      /// fit into displacement field of the instruction.
      bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
                                        bool hasSymbolicDisplacement = true);
  
  
-    /// isCalleePop - Determines whether the callee is required to pop its
+    /// Determines whether the callee is required to pop its
      /// own arguments. Callee pop is necessary to support tail calls.
      bool isCalleePop(CallingConv::ID CallingConv,
                       bool is64Bit, bool IsVarArg, bool TailCallOpt);
@@ -531,35 +594,39 @@ namespace llvm {
    }
  
    //===--------------------------------------------------------------------===//
-  //  X86TargetLowering - X86 Implementation of the TargetLowering interface
+  //  X86 Implementation of the TargetLowering interface
    class X86TargetLowering final : public TargetLowering {
    public:
-    explicit X86TargetLowering(X86TargetMachine &TM);
+    explicit X86TargetLowering(const X86TargetMachine &TM,
+                               const X86Subtarget &STI);
  
      unsigned getJumpTableEncoding() const override;
+    bool useSoftFloat() const override;
  
-    MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; }
+    MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+      return MVT::i8;
+    }
  
      const MCExpr *
      LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
                                const MachineBasicBlock *MBB, unsigned uid,
                                MCContext &Ctx) const override;
  
-    /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
-    /// jumptable.
+    /// Returns relocation base for the given PIC jumptable.
      SDValue getPICJumpTableRelocBase(SDValue Table,
                                       SelectionDAG &DAG) const override;
      const MCExpr *
      getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
                                   unsigned JTI, MCContext &Ctx) const override;
  
-    /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+    /// Return the desired alignment for ByVal aggregate
      /// function arguments in the caller parameter area. For X86, aggregates
      /// that contains are placed at 16-byte boundaries while the rest are at
      /// 4-byte boundaries.
-    unsigned getByValTypeAlignment(Type *Ty) const override;
+    unsigned getByValTypeAlignment(Type *Ty,
+                                   const DataLayout &DL) const override;
  
-    /// getOptimalMemOpType - Returns the target specific optimal type for load
+    /// Returns the target specific optimal type for load
      /// and store operations as a result of memset, memcpy, and memmove
      /// lowering. If DstAlign is zero that means it's safe to destination
      /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
@@ -574,7 +641,7 @@ namespace llvm {
                              bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
                              MachineFunction &MF) const override;
  
-    /// isSafeMemOpType - Returns true if it's safe to use load / store of the
+    /// Returns true if it's safe to use load / store of the
      /// specified type to expand memcpy / memset inline. This is mostly true
      /// for all types except for some special cases. For example, on X86
      /// targets without SSE2 f64 load / store are done with fldl / fstpl which
@@ -582,17 +649,16 @@ namespace llvm {
      /// legal as the hook is used before type legalization.
      bool isSafeMemOpType(MVT VT) const override;
  
-    /// allowsMisalignedMemoryAccesses - Returns true if the target allows
-    /// unaligned memory accesses. of the specified type. Returns whether it
-    /// is "fast" by reference in the second argument.
+    /// Returns true if the target allows unaligned memory accesses of the
+    /// specified type. Returns whether it is "fast" in the last argument.
      bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
                                         bool *Fast) const override;
  
-    /// LowerOperation - Provide custom lowering hooks for some operations.
+    /// Provide custom lowering hooks for some operations.
      ///
      SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
  
-    /// ReplaceNodeResults - Replace the results of node with an illegal result
+    /// Replace the results of node with an illegal result
      /// type with new values built out of custom code.
      ///
      void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
@@ -601,13 +667,13 @@ namespace llvm {
  
      SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
  
-    /// isTypeDesirableForOp - Return true if the target has native support for
+    /// Return true if the target has native support for
      /// the specified value type and it is 'desirable' to use the type for the
      /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
      /// instruction encodings are longer and some i16 instructions are slow.
      bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
  
-    /// isTypeDesirable - Return true if the target has native support for the
+    /// Return true if the target has native support for the
      /// specified value type and it is 'desirable' to use the type. e.g. On x86
      /// i16 is legal, but undesirable since i16 instruction encodings are longer
      /// and some i16 instructions are slow.
@@ -618,24 +684,26 @@ namespace llvm {
                                    MachineBasicBlock *MBB) const override;
  
  
-    /// getTargetNodeName - This method returns the name of a target specific
-    /// DAG node.
+    /// This method returns the name of a target specific DAG node.
      const char *getTargetNodeName(unsigned Opcode) const override;
  
-    /// getSetCCResultType - Return the value type to use for ISD::SETCC.
-    EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+    bool isCheapToSpeculateCttz() const override;
  
-    /// computeKnownBitsForTargetNode - Determine which of the bits specified
-    /// in Mask are known to be either zero or one and return them in the
-    /// KnownZero/KnownOne bitsets.
+    bool isCheapToSpeculateCtlz() const override;
+
+    /// Return the value type to use for ISD::SETCC.
+    EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+                           EVT VT) const override;
+
+    /// Determine which of the bits specified in Mask are known to be either
+    /// zero or one and return them in the KnownZero/KnownOne bitsets.
      void computeKnownBitsForTargetNode(const SDValue Op,
                                         APInt &KnownZero,
                                         APInt &KnownOne,
                                         const SelectionDAG &DAG,
                                         unsigned Depth = 0) const override;
  
-    // ComputeNumSignBitsForTargetNode - Determine the number of bits in the
-    // operation that are sign bits.
+    /// Determine the number of bits in the operation that are sign bits.
      unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
                                               const SelectionDAG &DAG,
                                               unsigned Depth) const override;
@@ -647,8 +715,7 @@ namespace llvm {
  
      bool ExpandInlineAsm(CallInst *CI) const override;
  
-    ConstraintType
-      getConstraintType(const std::string &Constraint) const override;
+    ConstraintType getConstraintType(StringRef Constraint) const override;
  
      /// Examine constraint string and operand type and determine a weight value.
      /// The operand object must already have been set up with the operand type.
@@ -658,34 +725,47 @@ namespace llvm {
  
      const char *LowerXConstraint(EVT ConstraintVT) const override;
  
-    /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
-    /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is
-    /// true it means one of the asm constraint of the inline asm instruction
-    /// being processed is 'm'.
+    /// Lower the specified operand into the Ops vector. If it is invalid, don't
+    /// add anything to Ops. If hasMemory is true it means one of the asm
+    /// constraint of the inline asm instruction being processed is 'm'.
      void LowerAsmOperandForConstraint(SDValue Op,
                                        std::string &Constraint,
                                        std::vector<SDValue> &Ops,
                                        SelectionDAG &DAG) const override;
  
-    /// getRegForInlineAsmConstraint - Given a physical register constraint
+    unsigned
+    getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
+      if (ConstraintCode == "i")
+        return InlineAsm::Constraint_i;
+      else if (ConstraintCode == "o")
+        return InlineAsm::Constraint_o;
+      else if (ConstraintCode == "v")
+        return InlineAsm::Constraint_v;
+      else if (ConstraintCode == "X")
+        return InlineAsm::Constraint_X;
+      return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+    }
+
+    /// Given a physical register constraint
      /// (e.g. {edx}), return the register number and the register class for the
      /// register.  This should only be used for C_Register constraints.  On
      /// error, this returns a register number of 0.
-    std::pair<unsigned, const TargetRegisterClass*>
-      getRegForInlineAsmConstraint(const std::string &Constraint,
-                                   MVT VT) const override;
+    std::pair<unsigned, const TargetRegisterClass *>
+    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                 StringRef Constraint, MVT VT) const override;
  
-    /// isLegalAddressingMode - Return true if the addressing mode represented
+    /// Return true if the addressing mode represented
      /// by AM is legal for this target, for a load/store of the specified type.
-    bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
+    bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+                               Type *Ty, unsigned AS) const override;
  
-    /// isLegalICmpImmediate - Return true if the specified immediate is legal
+    /// Return true if the specified immediate is legal
      /// icmp immediate, that is the target has icmp instructions which can
      /// compare a register against the immediate without having to materialize
      /// the immediate into a register.
      bool isLegalICmpImmediate(int64_t Imm) const override;
  
-    /// isLegalAddImmediate - Return true if the specified immediate is legal
+    /// Return true if the specified immediate is legal
      /// add immediate, that is the target has add instructions which can
      /// add a register and the immediate without having to materialize
      /// the immediate into a register.
@@ -696,11 +776,12 @@ namespace llvm {
      /// of the specified type.
      /// If the AM is supported, the return value must be >= 0.
      /// If the AM is not supported, it returns a negative value.
-    int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override;
+    int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
+                             unsigned AS) const override;
  
      bool isVectorShiftByScalarCheap(Type *Ty) const override;
  
-    /// isTruncateFree - Return true if it's free to truncate a value of
+    /// Return true if it's free to truncate a value of
      /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
      /// register EAX to i16 by referencing its sub-register AX.
      bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
@@ -708,7 +789,7 @@ namespace llvm {
  
      bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
  
-    /// isZExtFree - Return true if any actual instruction that defines a
+    /// Return true if any actual instruction that defines a
      /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
      /// register. This does not necessarily include registers defined in
      /// unknown ways, such as incoming arguments, or copies from unknown
@@ -720,37 +801,39 @@ namespace llvm {
      bool isZExtFree(EVT VT1, EVT VT2) const override;
      bool isZExtFree(SDValue Val, EVT VT2) const override;
  
-    /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
-    /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
-    /// expanded to FMAs when this method returns true, otherwise fmuladd is
-    /// expanded to fmul + fadd.
+    /// Return true if folding a vector load into ExtVal (a sign, zero, or any
+    /// extend node) is profitable.
+    bool isVectorLoadExtDesirable(SDValue) const override;
+
+    /// Return true if an FMA operation is faster than a pair of fmul and fadd
+    /// instructions. fmuladd intrinsics will be expanded to FMAs when this
+    /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
      bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
  
-    /// isNarrowingProfitable - Return true if it's profitable to narrow
+    /// Return true if it's profitable to narrow
      /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
      /// from i32 to i8 but not from i32 to i16.
      bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
  
-    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// Returns true if the target can instruction select the
      /// specified FP immediate natively. If false, the legalizer will
      /// materialize the FP immediate as a load from a constant pool.
      bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
  
-    /// isShuffleMaskLegal - Targets can use this to indicate that they only
-    /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
-    /// By default, if a target supports the VECTOR_SHUFFLE node, all mask
-    /// values are assumed to be legal.
+    /// Targets can use this to indicate that they only support *some*
+    /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
+    /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
+    /// be legal.
      bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
                              EVT VT) const override;
  
-    /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
-    /// used by Targets can use this to indicate if there is a suitable
-    /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
-    /// pool entry.
+    /// Similar to isShuffleMaskLegal. This is used by Targets can use this to
+    /// indicate if there is a suitable VECTOR_SHUFFLE that can be used to
+    /// replace a VAND with a constant pool entry.
      bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
                                  EVT VT) const override;
  
-    /// ShouldShrinkFPConstant - If true, then instruction selection should
+    /// If true, then instruction selection should
      /// seek to shrink the FP constant of the specified type to a smaller type
      /// in order to save space and / or reduce runtime.
      bool ShouldShrinkFPConstant(EVT VT) const override {
@@ -760,48 +843,43 @@ namespace llvm {
        return !X86ScalarSSEf64 || VT == MVT::f80;
      }
  
-    const X86Subtarget* getSubtarget() const {
-      return Subtarget;
-    }
+    /// Return true if we believe it is correct and profitable to reduce the
+    /// load node to a smaller type.
+    bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
+                               EVT NewVT) const override;
  
-    /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
-    /// computed in an SSE register, not on the X87 floating point stack.
+    /// Return true if the specified scalar FP type is computed in an SSE
+    /// register, not on the X87 floating point stack.
      bool isScalarFPTypeInSSEReg(EVT VT) const {
        return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
        (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
      }
  
-    /// isTargetFTOL - Return true if the target uses the MSVC _ftol2 routine
-    /// for fptoui.
-    bool isTargetFTOL() const;
-
-    /// isIntegerTypeFTOL - Return true if the MSVC _ftol2 routine should be
-    /// used for fptoui to the given type.
-    bool isIntegerTypeFTOL(EVT VT) const {
-      return isTargetFTOL() && VT == MVT::i64;
-    }
-
      /// \brief Returns true if it is beneficial to convert a load of a constant
      /// to just the constant itself.
      bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                             Type *Ty) const override;
  
+    /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
+    /// with this index.
+    bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const override;
+
      /// Intel processors have a unified instruction and data cache
      const char * getClearCacheBuiltinName() const override {
        return nullptr; // nothing to do, move along.
      }
  
-    unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+    unsigned getRegisterByName(const char* RegName, EVT VT,
+                               SelectionDAG &DAG) const override;
  
-    /// createFastISel - This method returns a target specific FastISel object,
+    /// This method returns a target specific FastISel object,
      /// or null if the target does not support "fast" ISel.
      FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
                               const TargetLibraryInfo *libInfo) const override;
  
-    /// getStackCookieLocation - Return true if the target stores stack
-    /// protector cookies at a fixed offset in some non-standard address
-    /// space, and populates the address space and offset as
-    /// appropriate.
+    /// Return true if the target stores stack protector cookies at a fixed
+    /// offset in some non-standard address space, and populates the address
+    /// space and offset as appropriate.
      bool getStackCookieLocation(unsigned &AddressSpace,
                                  unsigned &Offset) const override;
  
@@ -810,38 +888,33 @@ namespace llvm {
  
      bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
  
-    /// \brief Reset the operation actions based on target options.
-    void resetOperationActions() override;
-
      bool useLoadStackGuardNode() const override;
      /// \brief Customize the preferred legalization strategy for certain types.
      LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
  
+    bool isIntDivCheap(EVT VT, AttributeSet Attr) const override;
+
    protected:
-    std::pair<const TargetRegisterClass*, uint8_t>
-    findRepresentativeClass(MVT VT) const override;
+    std::pair<const TargetRegisterClass *, uint8_t>
+    findRepresentativeClass(const TargetRegisterInfo *TRI,
+                            MVT VT) const override;
  
    private:
-    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+    /// Keep a pointer to the X86Subtarget around so that we can
      /// make the right decision when generating code for different targets.
      const X86Subtarget *Subtarget;
      const DataLayout *TD;
  
-    /// Used to store the TargetOptions so that we don't waste time resetting
-    /// the operation actions unless we have to.
-    TargetOptions TO;
-
-    /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
-    /// floating point ops.
+    /// Select between SSE or x87 floating point ops.
      /// When SSE is available, use it for f32 operations.
      /// When SSE2 is available, use it for f64 operations.
      bool X86ScalarSSEf32;
      bool X86ScalarSSEf64;
  
-    /// LegalFPImmediates - A list of legal fp immediates.
+    /// A list of legal FP immediates.
      std::vector<APFloat> LegalFPImmediates;
  
-    /// addLegalFPImmediate - Indicate that this x86 target can instruction
+    /// Indicate that this x86 target can instruction
      /// select the specified FP immediate natively.
      void addLegalFPImmediate(const APFloat& Imm) {
        LegalFPImmediates.push_back(Imm);
@@ -865,9 +938,8 @@ namespace llvm {
  
      // Call lowering helpers.
  
-    /// IsEligibleForTailCallOptimization - Check whether the call is eligible
-    /// for tail call optimization. Targets which want to do tail call
-    /// optimization should implement this function.
+    /// Check whether the call is eligible for tail call optimization. Targets
+    /// that want to do tail call optimization should implement this function.
      bool IsEligibleForTailCallOptimization(SDValue Callee,
                                             CallingConv::ID CalleeCC,
                                             bool isVarArg,
@@ -928,12 +1000,14 @@ namespace llvm {
      SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerCATCHRET(SDValue Op, SelectionDAG &DAG) const;
      SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
      SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
  
      SDValue
        LowerFormalArguments(SDValue Chain,
@@ -966,9 +1040,13 @@ namespace llvm {
  
      bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
      bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
-    bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+    TargetLoweringBase::AtomicRMWExpansionKind
+    shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+
+    LoadInst *
+    lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
  
-    bool needsCmpXchgNb(const Type *MemType) const;
+    bool needsCmpXchgNb(Type *MemType) const;
  
      /// Utility function to emit atomic-load-arith operations (and, or, xor,
      /// nand, max, min, umax, umin). It takes the corresponding instruction to
@@ -995,6 +1073,9 @@ namespace llvm {
      MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
                                           MachineBasicBlock *BB) const;
  
+    MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr *I,
+                                           MachineBasicBlock *BB) const;
+
      MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI,
                                                MachineBasicBlock *BB) const;
  
@@ -1028,6 +1109,18 @@ namespace llvm {
  
      /// Convert a comparison if required by the subtarget.
      SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
+
+    /// Use rsqrt* to speed up sqrt calculations.
+    SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+                             unsigned &RefinementSteps,
+                             bool &UseOneConstNR) const override;
+
+    /// Use rcp* to speed up fdiv calculations.
+    SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+                             unsigned &RefinementSteps) const override;
+
+    /// Reassociate floating point divisions into multiply by reciprocal.
+    unsigned combineRepeatedFPDivisors() const override;
    };
  
    namespace X86 {