[AVX512] Fix copy-and-paste bugs in vpermil

[oota-llvm.git] / lib / Target / X86 / X86ISelDAGToDAG.cpp
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp

index ad58a57499a5ea2379b5f8f5a4c1973eabf8c465..4386028e9f5810e2a7f5767e944155eecce5ea39 100644 (file)
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -24,6 +24,7 @@
  #include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Function.h"
  #include "llvm/IR/Instructions.h"
  #include "llvm/IR/Intrinsics.h"
  #include "llvm/IR/Type.h"
@@ -33,6 +34,7 @@
  #include "llvm/Support/raw_ostream.h"
  #include "llvm/Target/TargetMachine.h"
  #include "llvm/Target/TargetOptions.h"
+#include <stdint.h>
  using namespace llvm;
  
  #define DEBUG_TYPE "x86-isel"
@@ -236,10 +238,10 @@ namespace {
      inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
                                     SDValue &Scale, SDValue &Index,
                                     SDValue &Disp, SDValue &Segment) {
-      Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
-        CurDAG->getTargetFrameIndex(AM.Base_FrameIndex,
-                                    getTargetLowering()->getPointerTy()) :
-        AM.Base_Reg;
+      Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+                 ? CurDAG->getTargetFrameIndex(AM.Base_FrameIndex,
+                                               TLI->getPointerTy())
+                 : AM.Base_Reg;
        Scale = getI8Imm(AM.Scale);
        Index = AM.IndexReg;
        // These are 32-bit even in 64-bit mode since RIP relative offset
@@ -298,6 +300,13 @@ namespace {
      const X86InstrInfo *getInstrInfo() const {
        return getTargetMachine().getSubtargetImpl()->getInstrInfo();
      }
+
+    /// \brief Address-mode matching performs shift-of-and to and-of-shift
+    /// reassociation in order to expose more scaled addressing
+    /// opportunities.
+    bool ComplexPatternFuncMutatesDAG() const override {
+      return true;
+    }
    };
  }
  
@@ -509,7 +518,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
      // If the source and destination are SSE registers, then this is a legal
      // conversion that should not be lowered.
      const X86TargetLowering *X86Lowering =
-        static_cast<const X86TargetLowering *>(getTargetLowering());
+        static_cast<const X86TargetLowering *>(TLI);
      bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
      bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
      if (SrcIsSSE && DstIsSSE)
@@ -774,9 +783,10 @@ static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
    }
  }
  
-// Transform "(X >> (8-C1)) & C2" to "(X >> 8) & 0xff)" if safe. This
-// allows us to convert the shift and and into an h-register extract and
-// a scaled index. Returns false if the simplification is performed.
+// Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
+// safe. This allows us to convert the shift and and into an h-register
+// extract and a scaled index. Returns false if the simplification is
+// performed.
  static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
                                        uint64_t Mask,
                                        SDValue Shift, SDValue X,
@@ -1562,8 +1572,7 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
  ///
  SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
    unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
-  return CurDAG->getRegister(GlobalBaseReg,
-                             getTargetLowering()->getPointerTy()).getNode();
+  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode();
  }
  
  /// Atomic opcode table
@@ -1697,16 +1706,23 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
  static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
                                                  SDLoc dl,
                                                  enum AtomicOpc &Op, MVT NVT,
-                                                SDValue Val) {
+                                                SDValue Val,
+                                                const X86Subtarget *Subtarget) {
    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) {
      int64_t CNVal = CN->getSExtValue();
      // Quit if not 32-bit imm.
      if ((int32_t)CNVal != CNVal)
        return Val;
+    // Quit if INT32_MIN: it would be negated as it is negative and overflow,
+    // producing an immediate that does not fit in the 32 bits available for
+    // an immediate operand to sub. However, it still fits in 32 bits for the
+    // add (since it is not negated) so we can return target-constant.
+    if (CNVal == INT32_MIN)
+      return CurDAG->getTargetConstant(CNVal, NVT);
      // For atomic-load-add, we could do some optimizations.
      if (Op == ADD) {
        // Translate to INC/DEC if ADD by 1 or -1.
-      if ((CNVal == 1) || (CNVal == -1)) {
+      if (((CNVal == 1) || (CNVal == -1)) && !Subtarget->slowIncDec()) {
          Op = (CNVal == 1) ? INC : DEC;
          // No more constant operand after being translated into INC/DEC.
          return SDValue();
@@ -1778,7 +1794,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
        break;
    }
  
-  Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val);
+  Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val, Subtarget);
    bool isUnOp = !Val.getNode();
    bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
  
@@ -1815,6 +1831,8 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
            Opc = AtomicOpcTbl[Op][SextConstantI64];
          else if (i64immSExt32(Val.getNode()))
            Opc = AtomicOpcTbl[Op][ConstantI64];
+        else
+          llvm_unreachable("True 64 bits constant in SelectAtomicLoadArith");
        } else
          Opc = AtomicOpcTbl[Op][I64];
        break;
@@ -2200,6 +2218,25 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
      return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
                                  getI8Imm(ShlVal));
    }
+  case X86ISD::UMUL8:
+  case X86ISD::SMUL8: {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+
+    Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r);
+
+    SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL,
+                                          N0, SDValue()).getValue(1);
+
+    SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32);
+    SDValue Ops[] = {N1, InFlag};
+    SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+
+    ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+    ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
+    return nullptr;
+  }
+
    case X86ISD::UMUL: {
      SDValue N0 = Node->getOperand(0);
      SDValue N1 = Node->getOperand(1);