When both x/y and x%y are needed (x and y both scalar integer), compute

author Dan Gohman <gohman@apple.com>

Tue, 25 Sep 2007 18:23:27 +0000 (18:23 +0000)

committer Dan Gohman <gohman@apple.com>

Tue, 25 Sep 2007 18:23:27 +0000 (18:23 +0000)
author Dan Gohman <gohman@apple.com>
Tue, 25 Sep 2007 18:23:27 +0000 (18:23 +0000)
committer Dan Gohman <gohman@apple.com>
Tue, 25 Sep 2007 18:23:27 +0000 (18:23 +0000)
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp

index b33ced8ba663a88bc2a896958a4c1f0d8f843a2b..117d2734c547198a3495226e703e6e07e3cce251 100644 (file)
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1162,12 +1162,9 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
        return NULL;
      }
        
-    case ISD::SDIV:
-    case ISD::UDIV:
-    case ISD::SREM:
-    case ISD::UREM: {
-      bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM;
-      bool isDiv    = Opcode == ISD::SDIV || Opcode == ISD::UDIV;
+    case X86ISD::DIV:
+    case X86ISD::IDIV: {
+      bool isSigned = Opcode == X86ISD::IDIV;
        if (!isSigned)
          switch (NVT) {
          default: assert(0 && "Unsupported VT!");
@@ -1275,31 +1272,49 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
            SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
        }
  
-      unsigned Reg = isDiv ? LoReg : HiReg;
-      SDOperand Result;
-      if (Reg == X86::AH && Subtarget->is64Bit()) {
-        // Prevent use of AH in a REX instruction by referencing AX instead.
-        // Shift it down 8 bits.
-        Result = CurDAG->getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
-        Chain = Result.getValue(1);
-        Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result,
-                                     CurDAG->getTargetConstant(8, MVT::i8)), 0);
-        // Then truncate it down to i8.
-        SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
-        Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
-                                                 MVT::i8, Result, SRIdx), 0);
-      } else {
-        Result = CurDAG->getCopyFromReg(Chain, Reg, NVT, InFlag);
+      // Copy the division (low) result, if it is needed.
+      if (!N.getValue(0).use_empty()) {
+        SDOperand Result = CurDAG->getCopyFromReg(Chain, LoReg, NVT, InFlag);
          Chain = Result.getValue(1);
+        InFlag = Result.getValue(2);
+        ReplaceUses(N.getValue(0), Result);
+#ifndef NDEBUG
+        DOUT << std::string(Indent-2, ' ') << "=> ";
+        DEBUG(Result.Val->dump(CurDAG));
+        DOUT << "\n";
+#endif
+      }
+      // Copy the remainder (high) result, if it is needed.
+      if (!N.getValue(1).use_empty()) {
+        SDOperand Result;
+        if (HiReg == X86::AH && Subtarget->is64Bit()) {
+          // Prevent use of AH in a REX instruction by referencing AX instead.
+          // Shift it down 8 bits.
+          Result = CurDAG->getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
+          Chain = Result.getValue(1);
+          InFlag = Result.getValue(2);
+          Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result,
+                                       CurDAG->getTargetConstant(8, MVT::i8)), 0);
+          // Then truncate it down to i8.
+          SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
+          Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
+                                                   MVT::i8, Result, SRIdx), 0);
+        } else {
+          Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag);
+          Chain = Result.getValue(1);
+          InFlag = Result.getValue(2);
+        }
+        ReplaceUses(N.getValue(1), Result);
+#ifndef NDEBUG
+        DOUT << std::string(Indent-2, ' ') << "=> ";
+        DEBUG(Result.Val->dump(CurDAG));
+        DOUT << "\n";
+#endif
        }
-      ReplaceUses(N.getValue(0), Result);
        if (foldedLoad)
          ReplaceUses(N1.getValue(1), Chain);
  
  #ifndef NDEBUG
-      DOUT << std::string(Indent-2, ' ') << "=> ";
-      DEBUG(Result.Val->dump(CurDAG));
-      DOUT << "\n";
        Indent -= 2;
  #endif
  
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index a67e77f2ce692a28dcdacf6fb54e3d5e93927fc5..1a8089688bf6f925755078d394b08a880410a93c 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -155,6 +155,27 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
      setOperationAction(ISD::BIT_CONVERT      , MVT::i32  , Expand);
    }
  
+  // Divide and remainder are lowered to use div or idiv in legalize in
+  // order to expose the intermediate computations to trivial CSE. This is
+  // most noticeable when both x/y and x%y are being computed; they can be
+  // done with a single div or idiv.
+  setOperationAction(ISD::SDIV            , MVT::i8    , Custom);
+  setOperationAction(ISD::UDIV            , MVT::i8    , Custom);
+  setOperationAction(ISD::SREM            , MVT::i8    , Custom);
+  setOperationAction(ISD::UREM            , MVT::i8    , Custom);
+  setOperationAction(ISD::SDIV            , MVT::i16   , Custom);
+  setOperationAction(ISD::UDIV            , MVT::i16   , Custom);
+  setOperationAction(ISD::SREM            , MVT::i16   , Custom);
+  setOperationAction(ISD::UREM            , MVT::i16   , Custom);
+  setOperationAction(ISD::SDIV            , MVT::i32   , Custom);
+  setOperationAction(ISD::UDIV            , MVT::i32   , Custom);
+  setOperationAction(ISD::SREM            , MVT::i32   , Custom);
+  setOperationAction(ISD::UREM            , MVT::i32   , Custom);
+  setOperationAction(ISD::SDIV            , MVT::i64   , Custom);
+  setOperationAction(ISD::UDIV            , MVT::i64   , Custom);
+  setOperationAction(ISD::SREM            , MVT::i64   , Custom);
+  setOperationAction(ISD::UREM            , MVT::i64   , Custom);
+
    setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
    setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
    setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
@@ -3393,6 +3414,22 @@ SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
      return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
  }
  
+SDOperand X86TargetLowering::LowerIntegerDivOrRem(SDOperand Op, SelectionDAG &DAG) {
+  unsigned Opcode = Op.getOpcode();
+  MVT::ValueType NVT = Op.getValueType();
+  bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM;
+  bool isDiv    = Opcode == ISD::SDIV || Opcode == ISD::UDIV;
+  unsigned Opc = isSigned ? X86ISD::IDIV : X86ISD::DIV;
+
+  SDOperand Ops[] = { Op.getOperand(0), Op.getOperand(1) };
+  SDOperand DR = DAG.getNode(Opc, DAG.getVTList(NVT, NVT), Ops, 2);
+
+  if (isDiv)
+    return DR;
+
+  return SDOperand(DR.Val, 1);
+}
+
  SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
    assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
           Op.getOperand(0).getValueType() >= MVT::i16 &&
@@ -4668,6 +4705,10 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
    case ISD::SHL_PARTS:
    case ISD::SRA_PARTS:
    case ISD::SRL_PARTS:          return LowerShift(Op, DAG);
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SREM:
+  case ISD::UREM:               return LowerIntegerDivOrRem(Op, DAG);
    case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
    case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
    case ISD::FABS:               return LowerFABS(Op, DAG);
@@ -4751,6 +4792,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
    case X86ISD::TLSADDR:            return "X86ISD::TLSADDR";
    case X86ISD::THREAD_POINTER:     return "X86ISD::THREAD_POINTER";
    case X86ISD::EH_RETURN:          return "X86ISD::EH_RETURN";
+  case X86ISD::DIV:                return "X86ISD::DIV";
+  case X86ISD::IDIV:               return "X86ISD::IDIV";
    }
  }
  
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index 04279e835cbb8fe0348afa97cccfc608ab386e0d..10172d95a93df4456253b08d3e0f573b00a41318 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -181,6 +181,10 @@ namespace llvm {
        /// in order to obtain suitable precision.
        FRSQRT, FRCP,
  
+      /// DIV, IDIV - Unsigned and signed integer division and reciprocal.
+      ///
+      DIV, IDIV,
+
        // Thread Local Storage
        TLSADDR, THREAD_POINTER,
  
@@ -420,6 +424,7 @@ namespace llvm {
      SDOperand LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG);
      SDOperand LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG);
      SDOperand LowerShift(SDOperand Op, SelectionDAG &DAG);
+    SDOperand LowerIntegerDivOrRem(SDOperand Op, SelectionDAG &DAG);
      SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG);
      SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG);
      SDOperand LowerFABS(SDOperand Op, SelectionDAG &DAG);
diff --git a/test/CodeGen/X86/divrem.ll b/test/CodeGen/X86/divrem.ll

new file mode 100644 (file)

index 0000000..a611edd
--- /dev/null
+++ b/test/CodeGen/X86/divrem.ll
@@ -0,0 +1,58 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep div | count 8
+
+define void @si64(i64 %x, i64 %y, i64* %p, i64* %q) {
+       %r = sdiv i64 %x, %y
+       %t = srem i64 %x, %y
+       store i64 %r, i64* %p
+       store i64 %t, i64* %q
+       ret void
+}
+define void @si32(i32 %x, i32 %y, i32* %p, i32* %q) {
+       %r = sdiv i32 %x, %y
+       %t = srem i32 %x, %y
+       store i32 %r, i32* %p
+       store i32 %t, i32* %q
+       ret void
+}
+define void @si16(i16 %x, i16 %y, i16* %p, i16* %q) {
+       %r = sdiv i16 %x, %y
+       %t = srem i16 %x, %y
+       store i16 %r, i16* %p
+       store i16 %t, i16* %q
+       ret void
+}
+define void @si8(i8 %x, i8 %y, i8* %p, i8* %q) {
+       %r = sdiv i8 %x, %y
+       %t = srem i8 %x, %y
+       store i8 %r, i8* %p
+       store i8 %t, i8* %q
+       ret void
+}
+define void @ui64(i64 %x, i64 %y, i64* %p, i64* %q) {
+       %r = udiv i64 %x, %y
+       %t = urem i64 %x, %y
+       store i64 %r, i64* %p
+       store i64 %t, i64* %q
+       ret void
+}
+define void @ui32(i32 %x, i32 %y, i32* %p, i32* %q) {
+       %r = udiv i32 %x, %y
+       %t = urem i32 %x, %y
+       store i32 %r, i32* %p
+       store i32 %t, i32* %q
+       ret void
+}
+define void @ui16(i16 %x, i16 %y, i16* %p, i16* %q) {
+       %r = udiv i16 %x, %y
+       %t = urem i16 %x, %y
+       store i16 %r, i16* %p
+       store i16 %t, i16* %q
+       ret void
+}
+define void @ui8(i8 %x, i8 %y, i8* %p, i8* %q) {
+       %r = udiv i8 %x, %y
+       %t = urem i8 %x, %y
+       store i8 %r, i8* %p
+       store i8 %t, i8* %q
+       ret void
+}
author	Dan Gohman <gohman@apple.com>
	Tue, 25 Sep 2007 18:23:27 +0000 (18:23 +0000)
committer	Dan Gohman <gohman@apple.com>
	Tue, 25 Sep 2007 18:23:27 +0000 (18:23 +0000)
lib/Target/X86/X86ISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
test/CodeGen/X86/divrem.ll	[new file with mode: 0644]	patch \| blob