Improve ISel across lane float min/max reduction

author Jun Bum Lim <junbuml@codeaurora.org>

Fri, 9 Oct 2015 14:11:25 +0000 (14:11 +0000)

committer Jun Bum Lim <junbuml@codeaurora.org>

Fri, 9 Oct 2015 14:11:25 +0000 (14:11 +0000)
author Jun Bum Lim <junbuml@codeaurora.org>
Fri, 9 Oct 2015 14:11:25 +0000 (14:11 +0000)
committer Jun Bum Lim <junbuml@codeaurora.org>
Fri, 9 Oct 2015 14:11:25 +0000 (14:11 +0000)
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index fa8cad827955eee07db34a9ff5dfa6bbdae8dea9..a599997f87a30d3c4c5bfca18d8745e8bac90723 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8750,8 +8750,13 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
      return SDValue();
  
    int NumVecElts = VTy.getVectorNumElements();
-  if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16)
-    return SDValue();
+  if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
+    if (NumVecElts != 4)
+      return SDValue();
+  } else {
+    if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16)
+      return SDValue();
+  }
  
    int NumExpectedSteps = APInt(8, NumVecElts).logBase2();
    SDValue PreOp = OpV;
@@ -8802,6 +8807,8 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
      PreOp = CurOp;
    }
    unsigned Opcode;
+  bool IsIntrinsic = false;
+
    switch (Op) {
    default:
      llvm_unreachable("Unexpected operator for across vector reduction");
@@ -8820,11 +8827,24 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
    case ISD::UMIN:
      Opcode = AArch64ISD::UMINV;
      break;
+  case ISD::FMAXNUM:
+    Opcode = Intrinsic::aarch64_neon_fmaxnmv;
+    IsIntrinsic = true;
+    break;
+  case ISD::FMINNUM:
+    Opcode = Intrinsic::aarch64_neon_fminnmv;
+    IsIntrinsic = true;
+    break;
    }
    SDLoc DL(N);
-  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0),
-                     DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp),
-                     DAG.getConstant(0, DL, MVT::i64));
+
+  return IsIntrinsic
+             ? DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0),
+                           DAG.getConstant(Opcode, DL, MVT::i32), PreOp)
+             : DAG.getNode(
+                   ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0),
+                   DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp),
+                   DAG.getConstant(0, DL, MVT::i64));
  }
  
  /// Target-specific DAG combine for the across vector min/max reductions.
@@ -8848,9 +8868,6 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
  ///     becomes :
  ///   %1 = smaxv %0
  ///   %result = extract_vector_elt %1, 0
-/// FIXME: Currently this function matches only SMAXV, UMAXV, SMINV, and UMINV.
-/// We could also support other types of across lane reduction available
-/// in AArch64, including FMAXNMV, FMAXV, FMINNMV, and FMINV.
  static SDValue
  performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
                                          const AArch64Subtarget *Subtarget) {
@@ -8878,17 +8895,26 @@ performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
    SDValue VectorOp = SetCC.getOperand(0);
    unsigned Op = VectorOp->getOpcode();
    // Check if the input vector is fed by the operator we want to handle.
-  if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN && Op != ISD::UMIN)
+  if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN &&
+      Op != ISD::UMIN && Op != ISD::FMAXNUM && Op != ISD::FMINNUM)
      return SDValue();
  
    EVT VTy = VectorOp.getValueType();
    if (!VTy.isVector())
      return SDValue();
  
-  EVT EltTy = VTy.getVectorElementType();
-  if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
+  if (VTy.getSizeInBits() < 64)
      return SDValue();
  
+  EVT EltTy = VTy.getVectorElementType();
+  if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
+    if (EltTy != MVT::f32)
+      return SDValue();
+  } else {
+    if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
+      return SDValue();
+  }
+
    // Check if extracting from the same vector.
    // For example,
    //   %sc = setcc %vector, %svn1, gt
@@ -8904,7 +8930,13 @@ performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
    if ((Op == ISD::SMAX && CC != ISD::SETGT && CC != ISD::SETGE) ||
        (Op == ISD::UMAX && CC != ISD::SETUGT && CC != ISD::SETUGE) ||
        (Op == ISD::SMIN && CC != ISD::SETLT && CC != ISD::SETLE) ||
-      (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE))
+      (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE) ||
+      (Op == ISD::FMAXNUM && CC != ISD::SETOGT && CC != ISD::SETOGE &&
+       CC != ISD::SETUGT && CC != ISD::SETUGE && CC != ISD::SETGT &&
+       CC != ISD::SETGE) ||
+      (Op == ISD::FMINNUM && CC != ISD::SETOLT && CC != ISD::SETOLE &&
+       CC != ISD::SETULT && CC != ISD::SETULE && CC != ISD::SETLT &&
+       CC != ISD::SETLE))
      return SDValue();
  
    // Expect to check only lane 0 from the vector SETCC.
@@ -8963,6 +8995,9 @@ performAcrossLaneAddReductionCombine(SDNode *N, SelectionDAG &DAG,
    if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
      return SDValue();
  
+  if (VTy.getSizeInBits() < 64)
+    return SDValue();
+
    return tryMatchAcrossLaneShuffleForReduction(N, N0, ISD::ADD, DAG);
  }
  
diff --git a/test/CodeGen/AArch64/aarch64-minmaxv.ll b/test/CodeGen/AArch64/aarch64-minmaxv.ll

index 42c76e443e5a3b1f7422c32a439c498804ef8246..385427f2026e8ccaf680b54cab517f415f299a52 100644 (file)
--- a/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -285,3 +285,35 @@ define i64 @umin_D(<2 x i64>* nocapture readonly %arr)  {
    %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt
    ret i64 %r
  }
+
+; CHECK-LABEL: f_fmaxnmv
+; CHECK: fmaxnmv
+define float @f_fmaxnmv(<4 x float>* nocapture readonly %arr) {
+  %rdx.minmax.select  = load <4 x float>, <4 x float>* %arr
+  %rdx.shuf = shufflevector <4 x float> %rdx.minmax.select, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.minmax.cmp = fcmp fast oge <4 x float> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.select1 = select <4 x i1> %rdx.minmax.cmp, <4 x float> %rdx.minmax.select, <4 x float> %rdx.shuf
+  %rdx.shuf1 = shufflevector <4 x float> %rdx.minmax.select1, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp1 = fcmp fast oge <4 x float> %rdx.minmax.select1, %rdx.shuf1
+  %rdx.minmax.cmp1.elt = extractelement <4 x i1> %rdx.minmax.cmp1, i32 0
+  %rdx.minmax.select1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 0
+  %rdx.shuf1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 1
+  %r = select i1 %rdx.minmax.cmp1.elt, float %rdx.minmax.select1.elt, float %rdx.shuf1.elt
+  ret float %r
+}
+
+; CHECK-LABEL: f_fminnmv
+; CHECK: fminnmv
+define float @f_fminnmv(<4 x float>* nocapture readonly %arr) {
+  %rdx.minmax.select  = load <4 x float>, <4 x float>* %arr
+  %rdx.shuf = shufflevector <4 x float> %rdx.minmax.select, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.minmax.cmp = fcmp fast ole <4 x float> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.select1 = select <4 x i1> %rdx.minmax.cmp, <4 x float> %rdx.minmax.select, <4 x float> %rdx.shuf
+  %rdx.shuf1 = shufflevector <4 x float> %rdx.minmax.select1, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp1 = fcmp fast ole <4 x float> %rdx.minmax.select1, %rdx.shuf1
+  %rdx.minmax.cmp1.elt = extractelement <4 x i1> %rdx.minmax.cmp1, i32 0
+  %rdx.minmax.select1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 0
+  %rdx.shuf1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 1
+  %r = select i1 %rdx.minmax.cmp1.elt, float %rdx.minmax.select1.elt, float %rdx.shuf1.elt
+  ret float %r
+}
author	Jun Bum Lim <junbuml@codeaurora.org>
	Fri, 9 Oct 2015 14:11:25 +0000 (14:11 +0000)
committer	Jun Bum Lim <junbuml@codeaurora.org>
	Fri, 9 Oct 2015 14:11:25 +0000 (14:11 +0000)
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
test/CodeGen/AArch64/aarch64-minmaxv.ll		patch \| blob \| history