Simplified BLEND pattern matching for shuffles.

[oota-llvm.git] / lib / Target / X86 / X86ISelLowering.h
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index bd04de150d912beed661a00b3163460a70d52562..e830c5fef62a9528ce4196bdaa60874309c79f1a 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -15,14 +15,15 @@
  #ifndef X86ISELLOWERING_H
  #define X86ISELLOWERING_H
  
-#include "X86Subtarget.h"
-#include "X86RegisterInfo.h"
  #include "X86MachineFunctionInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
  #include "llvm/CodeGen/FastISel.h"
  #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetTransformImpl.h"
  
  namespace llvm {
    namespace X86ISD {
@@ -137,15 +138,15 @@ namespace llvm {
        /// relative displacements.
        WrapperRIP,
  
-      /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word
-      /// of an XMM vector, with the high word zero filled.
-      MOVQ2DQ,
-
        /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
        /// to an MMX vector.  If you think this is too close to the previous
        /// mnemonic, so do I; blame Intel.
        MOVDQ2Q,
  
+      /// MMX_MOVD2W - Copies a 32-bit value from the low word of a MMX
+      /// vector to a GPR.
+      MMX_MOVD2W,
+
        /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
        /// i32, corresponds to X86::PEXTRB.
        PEXTRB,
@@ -172,16 +173,34 @@ namespace llvm {
        /// ANDNP - Bitwise Logical AND NOT of Packed FP values.
        ANDNP,
  
-      /// PSIGNB/W/D - Copy integer sign.
-      PSIGNB, PSIGNW, PSIGND,
+      /// PSIGN - Copy integer sign.
+      PSIGN,
  
-      /// BLEND family of opcodes
+      /// BLENDV - Blend where the selector is a register.
        BLENDV,
  
+      /// BLENDI - Blend where the selector is an immediate.
+      BLENDI,
+
+      /// HADD - Integer horizontal add.
+      HADD,
+
+      /// HSUB - Integer horizontal sub.
+      HSUB,
+
+      /// FHADD - Floating point horizontal add.
+      FHADD,
+
+      /// FHSUB - Floating point horizontal sub.
+      FHSUB,
+
        /// FMAX, FMIN - Floating point max and min.
        ///
        FMAX, FMIN,
  
+      /// FMAXC, FMINC - Commutative FMIN and FMAX.
+      FMAXC, FMINC,
+
        /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
        /// approximation.  Note that these typically require refinement
        /// in order to obtain suitable precision.
@@ -190,6 +209,10 @@ namespace llvm {
        // TLSADDR - Thread Local Storage.
        TLSADDR,
  
+      // TLSBASEADDR - Thread Local Storage. A call to get the start address
+      // of the TLS block for the current module.
+      TLSBASEADDR,
+
        // TLSCALL - Thread Local Storage.  When calling to an OS provided
        // thunk at the address from an earlier relocation.
        TLSCALL,
@@ -197,6 +220,12 @@ namespace llvm {
        // EH_RETURN - Exception Handling helpers.
        EH_RETURN,
  
+      // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+      EH_SJLJ_SETJMP,
+
+      // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+      EH_SJLJ_LONGJMP,
+
        /// TC_RETURN - Tail call return.
        ///   operand #0 chain
        ///   operand #1 callee (register or absolute)
@@ -207,21 +236,46 @@ namespace llvm {
        // VZEXT_MOVL - Vector move low and zero extend.
        VZEXT_MOVL,
  
-      // VSHL, VSRL - Vector logical left / right shift.
-      VSHL, VSRL,
+      // VSEXT_MOVL - Vector move low and sign extend.
+      VSEXT_MOVL,
+
+      // VZEXT - Vector integer zero-extend.
+      VZEXT,
+
+      // VSEXT - Vector integer signed-extend.
+      VSEXT,
+
+      // VFPEXT - Vector FP extend.
+      VFPEXT,
+
+      // VFPROUND - Vector FP round.
+      VFPROUND,
+
+      // VSHL, VSRL - 128-bit vector logical left / right shift
+      VSHLDQ, VSRLDQ,
  
-      // CMPPD, CMPPS - Vector double/float comparison.
-      // CMPPD, CMPPS - Vector double/float comparison.
-      CMPPD, CMPPS,
+      // VSHL, VSRL, VSRA - Vector shift elements
+      VSHL, VSRL, VSRA,
+
+      // VSHLI, VSRLI, VSRAI - Vector shift elements by immediate
+      VSHLI, VSRLI, VSRAI,
+
+      // CMPP - Vector packed double/float comparison.
+      CMPP,
  
        // PCMP* - Vector integer comparisons.
-      PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
-      PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
+      PCMPEQ, PCMPGT,
  
        // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
        ADD, SUB, ADC, SBB, SMUL,
        INC, DEC, OR, XOR, AND,
  
+      ANDN, // ANDN - Bitwise AND NOT with FLAGS results.
+
+      BLSI,   // BLSI - Extract lowest set isolated bit
+      BLSMSK, // BLSMSK - Get mask up to lowest set bit
+      BLSR,   // BLSR - Reset lowest set bit
+
        UMUL, // LOW, HI, FLAGS = umul LHS, RHS
  
        // MUL_IMM - X86 specific multiply by immediate.
@@ -238,46 +292,36 @@ namespace llvm {
        PSHUFD,
        PSHUFHW,
        PSHUFLW,
-      PSHUFHW_LD,
-      PSHUFLW_LD,
-      SHUFPD,
-      SHUFPS,
+      SHUFP,
        MOVDDUP,
        MOVSHDUP,
        MOVSLDUP,
-      MOVSHDUP_LD,
-      MOVSLDUP_LD,
        MOVLHPS,
        MOVLHPD,
        MOVHLPS,
-      MOVHLPD,
        MOVLPS,
        MOVLPD,
        MOVSD,
        MOVSS,
-      UNPCKLPS,
-      UNPCKLPD,
-      VUNPCKLPSY,
-      VUNPCKLPDY,
-      UNPCKHPS,
-      UNPCKHPD,
-      VUNPCKHPSY,
-      VUNPCKHPDY,
-      PUNPCKLBW,
-      PUNPCKLWD,
-      PUNPCKLDQ,
-      PUNPCKLQDQ,
-      PUNPCKHBW,
-      PUNPCKHWD,
-      PUNPCKHDQ,
-      PUNPCKHQDQ,
-      VPERMILPS,
-      VPERMILPSY,
-      VPERMILPD,
-      VPERMILPDY,
-      VPERM2F128,
+      UNPCKL,
+      UNPCKH,
+      VPERMILP,
+      VPERMV,
+      VPERMI,
+      VPERM2X128,
        VBROADCAST,
  
+      // PMULUDQ - Vector multiply packed unsigned doubleword integers
+      PMULUDQ,
+
+      // FMA nodes
+      FMADD,
+      FNMADD,
+      FMSUB,
+      FNMSUB,
+      FMADDSUB,
+      FMSUBADD,
+
        // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
        // according to %al. An operator is needed so that this can be expanded
        // with control flow.
@@ -291,12 +335,28 @@ namespace llvm {
        // falls back to heap allocation if not.
        SEG_ALLOCA,
  
+      // WIN_FTOL - Windows's _ftol2 runtime routine to do fptoui.
+      WIN_FTOL,
+
        // Memory barrier
        MEMBARRIER,
        MFENCE,
        SFENCE,
        LFENCE,
  
+      // FNSTSW16r - Store FP status word into i16 register.
+      FNSTSW16r,
+
+      // SAHF - Store contents of %ah into %eflags.
+      SAHF,
+
+      // RDRAND - Get a random integer and indicate whether it is valid in CF.
+      RDRAND,
+
+      // PCMP*STRI
+      PCMPISTRI,
+      PCMPESTRI,
+
        // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
        // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
        // Atomic 64-bit binary operations.
@@ -306,6 +366,10 @@ namespace llvm {
        ATOMXOR64_DAG,
        ATOMAND64_DAG,
        ATOMNAND64_DAG,
+      ATOMMAX64_DAG,
+      ATOMMIN64_DAG,
+      ATOMUMAX64_DAG,
+      ATOMUMIN64_DAG,
        ATOMSWAP64_DAG,
  
        // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap.
@@ -360,75 +424,6 @@ namespace llvm {
  
    /// Define some predicates that are used for node matching.
    namespace X86 {
-    /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to PSHUFD.
-    bool isPSHUFDMask(ShuffleVectorSDNode *N);
-
-    /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to PSHUFD.
-    bool isPSHUFHWMask(ShuffleVectorSDNode *N);
-
-    /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to PSHUFD.
-    bool isPSHUFLWMask(ShuffleVectorSDNode *N);
-
-    /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to SHUFP*.
-    bool isSHUFPMask(ShuffleVectorSDNode *N);
-
-    /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
-    bool isMOVHLPSMask(ShuffleVectorSDNode *N);
-
-    /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
-    /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
-    /// <2, 3, 2, 3>
-    bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N);
-
-    /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for MOVLP{S|D}.
-    bool isMOVLPMask(ShuffleVectorSDNode *N);
-
-    /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for MOVHP{S|D}.
-    /// as well as MOVLHPS.
-    bool isMOVLHPSMask(ShuffleVectorSDNode *N);
-
-    /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to UNPCKL.
-    bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
-
-    /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to UNPCKH.
-    bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
-
-    /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
-    /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
-    /// <0, 0, 1, 1>
-    bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N);
-
-    /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
-    /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
-    /// <2, 2, 3, 3>
-    bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N);
-
-    /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to MOVSS,
-    /// MOVSD, and MOVD, i.e. setting the lowest element.
-    bool isMOVLMask(ShuffleVectorSDNode *N);
-
-    /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
-    bool isMOVSHDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget);
-
-    /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
-    bool isMOVSLDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget);
-
-    /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to MOVDDUP.
-    bool isMOVDDUPMask(ShuffleVectorSDNode *N);
-
      /// isVEXTRACTF128Index - Return true if the specified
      /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
      /// suitable for input to VEXTRACTF128.
@@ -439,23 +434,6 @@ namespace llvm {
      /// suitable for input to VINSERTF128.
      bool isVINSERTF128Index(SDNode *N);
  
-    /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
-    /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
-    /// instructions.
-    unsigned getShuffleSHUFImmediate(SDNode *N);
-
-    /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
-    /// the specified VECTOR_SHUFFLE mask with PSHUFHW instruction.
-    unsigned getShufflePSHUFHWImmediate(SDNode *N);
-
-    /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
-    /// the specified VECTOR_SHUFFLE mask with PSHUFLW instruction.
-    unsigned getShufflePSHUFLWImmediate(SDNode *N);
-
-    /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
-    /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
-    unsigned getShufflePALIGNRImmediate(SDNode *N);
-
      /// getExtractVEXTRACTF128Immediate - Return the appropriate
      /// immediate to extract the specified EXTRACT_SUBVECTOR index
      /// with VEXTRACTF128 instructions.
@@ -505,10 +483,6 @@ namespace llvm {
      getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
                                   unsigned JTI, MCContext &Ctx) const;
  
-    /// getStackPtrReg - Return the stack pointer register we are using: either
-    /// ESP or RSP.
-    unsigned getStackPtrReg() const { return X86StackPtr; }
-
      /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
      /// function arguments in the caller parameter area. For X86, aggregates
      /// that contains are placed at 16-byte boundaries while the rest are at
@@ -521,7 +495,7 @@ namespace llvm {
      /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
      /// means there isn't a need to check it against alignment requirement,
      /// probably because the source does not need to be loaded. If
-    /// 'NonScalarIntSafe' is true, that means it's safe to return a
+    /// 'IsZeroVal' is true, that means it's safe to return a
      /// non-scalar-integer type, e.g. empty string source, constant, or loaded
      /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
      /// constant so it does not need to be loaded.
@@ -529,7 +503,7 @@ namespace llvm {
      /// target-independent logic.
      virtual EVT
      getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
-                        bool NonScalarIntSafe, bool MemcpyStrSrc,
+                        bool IsZeroVal, bool MemcpyStrSrc,
                          MachineFunction &MF) const;
  
      /// allowsUnalignedMemoryAccesses - Returns true if the target allows
@@ -579,7 +553,6 @@ namespace llvm {
      /// in Mask are known to be either zero or one and return them in the
      /// KnownZero/KnownOne bitsets.
      virtual void computeMaskedBitsForTargetNode(const SDValue Op,
-                                                const APInt &Mask,
                                                  APInt &KnownZero,
                                                  APInt &KnownOne,
                                                  const SelectionDAG &DAG,
@@ -627,6 +600,18 @@ namespace llvm {
      /// by AM is legal for this target, for a load/store of the specified type.
      virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
  
+    /// isLegalICmpImmediate - Return true if the specified immediate is legal
+    /// icmp immediate, that is the target has icmp instructions which can
+    /// compare a register against the immediate without having to materialize
+    /// the immediate into a register.
+    virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+    /// isLegalAddImmediate - Return true if the specified immediate is legal
+    /// add immediate, that is the target has add instructions which can
+    /// add a register and the immediate without having to materialize
+    /// the immediate into a register.
+    virtual bool isLegalAddImmediate(int64_t Imm) const;
+
      /// isTruncateFree - Return true if it's free to truncate a value of
      /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
      /// register EAX to i16 by referencing its sub-register AX.
@@ -644,6 +629,12 @@ namespace llvm {
      virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
      virtual bool isZExtFree(EVT VT1, EVT VT2) const;
  
+    /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+    /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+    /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+    /// is expanded to mul + add.
+    virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
+
      /// isNarrowingProfitable - Return true if it's profitable to narrow
      /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
      /// from i32 to i8 but not from i32 to i16.
@@ -689,9 +680,22 @@ namespace llvm {
        (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
      }
  
+    /// isTargetFTOL - Return true if the target uses the MSVC _ftol2 routine
+    /// for fptoui.
+    bool isTargetFTOL() const {
+      return Subtarget->isTargetWindows() && !Subtarget->is64Bit();
+    }
+
+    /// isIntegerTypeFTOL - Return true if the MSVC _ftol2 routine should be
+    /// used for fptoui to the given type.
+    bool isIntegerTypeFTOL(EVT VT) const {
+      return isTargetFTOL() && VT == MVT::i64;
+    }
+
      /// createFastISel - This method returns a target specific FastISel object,
      /// or null if the target does not support "fast" ISel.
-    virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+    virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+                                     const TargetLibraryInfo *libInfo) const;
  
      /// getStackCookieLocation - Return true if the target stores stack
      /// protector cookies at a fixed offset in some non-standard address
@@ -711,10 +715,7 @@ namespace llvm {
      /// make the right decision when generating code for different targets.
      const X86Subtarget *Subtarget;
      const X86RegisterInfo *RegInfo;
-    const TargetData *TD;
-
-    /// X86StackPtr - X86 physical register used as stack ptr.
-    unsigned X86StackPtr;
+    const DataLayout *TD;
  
      /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
      /// floating point ops.
@@ -758,6 +759,7 @@ namespace llvm {
                                             bool isVarArg,
                                             bool isCalleeStructRet,
                                             bool isCallerStructRet,
+                                           Type *RetTy,
                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
                                      const SmallVectorImpl<SDValue> &OutVals,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -771,20 +773,17 @@ namespace llvm {
                                           SelectionDAG &DAG) const;
  
      std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
-                                               bool isSigned) const;
+                                               bool isSigned,
+                                               bool isReplace) const;
  
      SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
                                     SelectionDAG &DAG) const;
      SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
@@ -798,50 +797,46 @@ namespace llvm {
      SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerToBT(SDValue And, ISD::CondCode CC,
                        DebugLoc dl, SelectionDAG &DAG) const;
      SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
  
-    SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
  
-    // Utility functions to help LowerVECTOR_SHUFFLE
-    SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
+    // Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR
+    SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const;
+    SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
+    SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const;
  
      virtual SDValue
        LowerFormalArguments(SDValue Chain,
@@ -850,12 +845,7 @@ namespace llvm {
                             DebugLoc dl, SelectionDAG &DAG,
                             SmallVectorImpl<SDValue> &InVals) const;
      virtual SDValue
-      LowerCall(SDValue Chain, SDValue Callee,
-                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
-                const SmallVectorImpl<ISD::OutputArg> &Outs,
-                const SmallVectorImpl<SDValue> &OutVals,
-                const SmallVectorImpl<ISD::InputArg> &Ins,
-                DebugLoc dl, SelectionDAG &DAG,
+      LowerCall(CallLoweringInfo &CLI,
                  SmallVectorImpl<SDValue> &InVals) const;
  
      virtual SDValue
@@ -865,7 +855,7 @@ namespace llvm {
                    const SmallVectorImpl<SDValue> &OutVals,
                    DebugLoc dl, SelectionDAG &DAG) const;
  
-    virtual bool isUsedByReturnOnly(SDNode *N) const;
+    virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const;
  
      virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
  
@@ -875,58 +865,21 @@ namespace llvm {
  
      virtual bool
      CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
-                  bool isVarArg,
-                  const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  LLVMContext &Context) const;
-
-    void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
-                                 SelectionDAG &DAG, unsigned NewOp) const;
-
-    /// Utility function to emit string processing sse4.2 instructions
-    /// that return in xmm0.
-    /// This takes the instruction to expand, the associated machine basic
-    /// block, the number of args, and whether or not the second arg is
-    /// in memory or not.
-    MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
-                                unsigned argNum, bool inMem) const;
-
-    /// Utility functions to emit monitor and mwait instructions. These
-    /// need to make sure that the arguments to the intrinsic are in the
-    /// correct registers.
-    MachineBasicBlock *EmitMonitor(MachineInstr *MI,
-                                   MachineBasicBlock *BB) const;
-    MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const;
-
-    /// Utility function to emit atomic bitwise operations (and, or, xor).
-    /// It takes the bitwise instruction to expand, the associated machine basic
-    /// block, and the associated X86 opcodes for reg/reg and reg/imm.
-    MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter(
-                                                    MachineInstr *BInstr,
-                                                    MachineBasicBlock *BB,
-                                                    unsigned regOpc,
-                                                    unsigned immOpc,
-                                                    unsigned loadOpc,
-                                                    unsigned cxchgOpc,
-                                                    unsigned notOpc,
-                                                    unsigned EAXreg,
-                                                    TargetRegisterClass *RC,
-                                                    bool invSrc = false) const;
-
-    MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(
-                                                    MachineInstr *BInstr,
-                                                    MachineBasicBlock *BB,
-                                                    unsigned regOpcL,
-                                                    unsigned regOpcH,
-                                                    unsigned immOpcL,
-                                                    unsigned immOpcH,
-                                                    bool invSrc = false) const;
-
-    /// Utility function to emit atomic min and max.  It takes the min/max
-    /// instruction to expand, the associated basic block, and the associated
-    /// cmov opcode for moving the min or max value.
-    MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr,
-                                                          MachineBasicBlock *BB,
-                                                        unsigned cmovOpc) const;
+                   bool isVarArg,
+                   const SmallVectorImpl<ISD::OutputArg> &Outs,
+                   LLVMContext &Context) const;
+
+    /// Utility function to emit atomic-load-arith operations (and, or, xor,
+    /// nand, max, min, umax, umin). It takes the corresponding instruction to
+    /// expand, the associated machine basic block, and the associated X86
+    /// opcodes for reg/reg.
+    MachineBasicBlock *EmitAtomicLoadArith(MachineInstr *MI,
+                                           MachineBasicBlock *MBB) const;
+
+    /// Utility function to emit atomic-load-arith operations (and, or, xor,
+    /// nand, add, sub, swap) for 64-bit operands on 32-bit target.
+    MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI,
+                                               MachineBasicBlock *MBB) const;
  
      // Utility function to emit the low-level va_arg code for X86-64.
      MachineBasicBlock *EmitVAARG64WithCustomInserter(
@@ -954,6 +907,12 @@ namespace llvm {
      MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI,
                                            MachineBasicBlock *BB) const;
  
+    MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI,
+                                        MachineBasicBlock *MBB) const;
+
+    MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
+                                         MachineBasicBlock *MBB) const;
+
      /// Emit nodes that will be selected as "test Op0,Op0", or something
      /// equivalent, for use with the given x86 condition code.
      SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const;
@@ -962,11 +921,40 @@ namespace llvm {
      /// equivalent, for use with the given x86 condition code.
      SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
                      SelectionDAG &DAG) const;
+
+    /// Convert a comparison if required by the subtarget.
+    SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
    };
  
    namespace X86 {
-    FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+    FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+                             const TargetLibraryInfo *libInfo);
    }
+
+  class X86ScalarTargetTransformImpl : public ScalarTargetTransformImpl {
+  public:
+    explicit X86ScalarTargetTransformImpl(const TargetLowering *TL) :
+      ScalarTargetTransformImpl(TL) {};
+
+    virtual PopcntHwSupport getPopcntHwSupport(unsigned TyWidth) const;
+  };
+
+  class X86VectorTargetTransformInfo : public VectorTargetTransformImpl {
+  public:
+    explicit X86VectorTargetTransformInfo(const TargetLowering *TL) :
+    VectorTargetTransformImpl(TL) {}
+
+    virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
+
+    virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+                                        unsigned Index) const;
+
+    unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                Type *CondTy) const;
+
+    virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+                                      Type *Src) const;
+  };
  }
  
  #endif    // X86ISELLOWERING_H