X86 CodeGenPrep: sink shufflevectors before shifts

[oota-llvm.git] / lib / Target / X86 / X86ISelLowering.h
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index 89ab4280357baecf5e63d9437b5c349752750e3f..ce9594ae3ed657c6d9d79fb501b10a876f6896e5 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -94,6 +94,9 @@ namespace llvm {
        /// operand, usually produced by a CMP instruction.
        SETCC,
  
+      /// X86 Select
+      SELECT,
+
        // Same as SETCC except it's materialized with a sbb and the value is all
        // one's or all zero's.
        SETCC_CARRY,  // R = carry_bit ? ~0 : 0
@@ -101,7 +104,7 @@ namespace llvm {
        /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
        /// Operands are two FP values to compare; result is a mask of
        /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
-      FSETCCss, FSETCCsd,
+      FSETCC,
  
        /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values,
        /// result in an integer GPR.  Needs masking for scalar result.
@@ -242,18 +245,21 @@ namespace llvm {
        /// the list of operands.
        TC_RETURN,
  
-      // VZEXT_MOVL - Vector move low and zero extend.
+      // VZEXT_MOVL - Vector move to low scalar and zero higher vector elements.
        VZEXT_MOVL,
  
-      // VSEXT_MOVL - Vector move low and sign extend.
-      VSEXT_MOVL,
-
        // VZEXT - Vector integer zero-extend.
        VZEXT,
  
        // VSEXT - Vector integer signed-extend.
        VSEXT,
  
+      // VTRUNC - Vector integer truncate.
+      VTRUNC,
+
+      // VTRUNC - Vector integer truncate with mask.
+      VTRUNCM,
+
        // VFPEXT - Vector FP extend.
        VFPEXT,
  
@@ -274,7 +280,7 @@ namespace llvm {
  
        // PCMP* - Vector integer comparisons.
        PCMPEQ, PCMPGT,
-      // PCMP*M - Vector integer comparisons, the result is in a mask vector
+      // PCMP*M - Vector integer comparisons, the result is in a mask vector.
        PCMPEQM, PCMPGTM,
  
        /// CMPM, CMPMU - Vector comparison generating mask bits for fp and
@@ -286,24 +292,26 @@ namespace llvm {
        ADD, SUB, ADC, SBB, SMUL,
        INC, DEC, OR, XOR, AND,
  
-      BLSI,   // BLSI - Extract lowest set isolated bit
-      BLSMSK, // BLSMSK - Get mask up to lowest set bit
-      BLSR,   // BLSR - Reset lowest set bit
+      BZHI,   // BZHI - Zero high bits
+      BEXTR,  // BEXTR - Bit field extract
  
        UMUL, // LOW, HI, FLAGS = umul LHS, RHS
  
        // MUL_IMM - X86 specific multiply by immediate.
        MUL_IMM,
  
-      // PTEST - Vector bitwise comparisons
+      // PTEST - Vector bitwise comparisons.
        PTEST,
  
-      // TESTP - Vector packed fp sign bitwise comparisons
+      // TESTP - Vector packed fp sign bitwise comparisons.
        TESTP,
  
+      // TESTM, TESTNM - Vector "test" in AVX-512, the result is in a mask vector.
+      TESTM,
+      TESTNM,
+
        // OR/AND test for masks
        KORTEST,
-      KTEST,
  
        // Several flavors of instructions with vector shuffle behaviors.
        PALIGNR,
@@ -326,11 +334,15 @@ namespace llvm {
        VPERMILP,
        VPERMV,
        VPERMV3,
+      VPERMIV3,
        VPERMI,
        VPERM2X128,
        VBROADCAST,
        // masked broadcast
        VBROADCASTM,
+      // Insert/Extract vector element
+      VINSERT,
+      VEXTRACT,
  
        // PMULUDQ - Vector multiply packed unsigned doubleword integers
        PMULUDQ,
@@ -564,7 +576,8 @@ namespace llvm {
      /// allowsUnalignedMemoryAccesses - Returns true if the target allows
      /// unaligned memory accesses. of the specified type. Returns whether it
      /// is "fast" by reference in the second argument.
-    virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS,
+                                               bool *Fast) const;
  
      /// LowerOperation - Provide custom lowering hooks for some operations.
      ///
@@ -666,6 +679,9 @@ namespace llvm {
      /// the immediate into a register.
      virtual bool isLegalAddImmediate(int64_t Imm) const;
  
+
+    virtual bool isVectorShiftByScalarCheap(Type *Ty) const;
+
      /// isTruncateFree - Return true if it's free to truncate a value of
      /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
      /// register EAX to i16 by referencing its sub-register AX.
@@ -749,6 +765,11 @@ namespace llvm {
        return isTargetFTOL() && VT == MVT::i64;
      }
  
+    /// \brief Returns true if it is beneficial to convert a load of a constant
+    /// to just the constant itself.
+    virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
+                                                   Type *Ty) const;
+
      /// createFastISel - This method returns a target specific FastISel object,
      /// or null if the target does not support "fast" ISel.
      virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
@@ -763,6 +784,8 @@ namespace llvm {
      SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
                        SelectionDAG &DAG) const;
  
+    virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const LLVM_OVERRIDE;
+
      /// \brief Reset the operation actions based on target options.
      virtual void resetOperationActions();
  
@@ -839,8 +862,6 @@ namespace llvm {
                                                 bool isSigned,
                                                 bool isReplace) const;
  
-    SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, SDLoc dl,
-                                   SelectionDAG &DAG) const;
      SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
@@ -853,24 +874,14 @@ namespace llvm {
      SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const;
      SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
      SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerZERO_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerToBT(SDValue And, ISD::CondCode CC,
                        SDLoc dl, SelectionDAG &DAG) const;
      SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
@@ -889,10 +900,7 @@ namespace llvm {
      SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
  
      virtual SDValue
        LowerFormalArguments(SDValue Chain,
@@ -924,6 +932,8 @@ namespace llvm {
                     const SmallVectorImpl<ISD::OutputArg> &Outs,
                     LLVMContext &Context) const;
  
+    virtual const uint16_t *getScratchRegisters(CallingConv::ID CC) const;
+
      /// Utility function to emit atomic-load-arith operations (and, or, xor,
      /// nand, max, min, umax, umin). It takes the corresponding instruction to
      /// expand, the associated machine basic block, and the associated X86
@@ -968,6 +978,9 @@ namespace llvm {
      MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
                                           MachineBasicBlock *MBB) const;
  
+    MachineBasicBlock *emitFMA3Instr(MachineInstr *MI,
+                                     MachineBasicBlock *MBB) const;
+
      /// Emit nodes that will be selected as "test Op0,Op0", or something
      /// equivalent, for use with the given x86 condition code.
      SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const;