Add an ARM-specific DAG combining to avoid redundant VDUPLANE nodes.

author Bob Wilson <bob.wilson@apple.com>

Wed, 14 Jul 2010 01:22:12 +0000 (01:22 +0000)

committer Bob Wilson <bob.wilson@apple.com>

Wed, 14 Jul 2010 01:22:12 +0000 (01:22 +0000)
author Bob Wilson <bob.wilson@apple.com>
Wed, 14 Jul 2010 01:22:12 +0000 (01:22 +0000)
committer Bob Wilson <bob.wilson@apple.com>
Wed, 14 Jul 2010 01:22:12 +0000 (01:22 +0000)
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index 762e9b0e9f2958b2065e645f716daa808f81f312..9e9192bc8a07c2feb0dca9eaa597e07d7e6c2562 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -4228,6 +4228,34 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
    return SDValue();
  }
  
+/// PerformVDUPLANECombine - Target-specific dag combine xforms for
+/// ARMISD::VDUPLANE.
+static SDValue PerformVDUPLANECombine(SDNode *N,
+                                      TargetLowering::DAGCombinerInfo &DCI) {
+  // If the source is already a VMOVIMM splat, the VDUPLANE is redundant.
+  SDValue Op = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // Ignore bit_converts.
+  while (Op.getOpcode() == ISD::BIT_CONVERT)
+    Op = Op.getOperand(0);
+  if (Op.getOpcode() != ARMISD::VMOVIMM)
+    return SDValue();
+
+  // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
+  unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
+  // The canonical VMOV for a zero vector uses a 32-bit element size.
+  unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  unsigned EltBits;
+  if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
+    EltSize = 8;
+  if (EltSize > VT.getVectorElementType().getSizeInBits())
+    return SDValue();
+
+  SDValue Res = DCI.DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
+  return DCI.CombineTo(N, Res, false);
+}
+
  /// getVShiftImm - Check if this is a valid build_vector for the immediate
  /// operand of a vector shift operation, where all the elements of the
  /// build_vector must have the same constant integer value.
@@ -4606,6 +4634,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
    case ISD::SUB:        return PerformSUBCombine(N, DCI);
    case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
    case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
+  case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
    case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
    case ISD::SHL:
    case ISD::SRA:
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll

index 50e4df9f57c5d28cc042812e821c76368b8c6aca..a545f6c03d5b4c53d71740eaabadbe30f5818810 100644 (file)
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -267,3 +267,15 @@ entry:
    %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
    ret <2 x double> %0
  }
+
+; Radar 7373643
+;CHECK: redundantVdup:
+;CHECK: vmov.i8
+;CHECK-NOT: vdup.8
+;CHECK: vstr.64
+define void @redundantVdup(<8 x i8>* %ptr) nounwind {
+  %1 = insertelement <8 x i8> undef, i8 -128, i32 0
+  %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  store <8 x i8> %2, <8 x i8>* %ptr, align 8
+  ret void
+}
author	Bob Wilson <bob.wilson@apple.com>
	Wed, 14 Jul 2010 01:22:12 +0000 (01:22 +0000)
committer	Bob Wilson <bob.wilson@apple.com>
	Wed, 14 Jul 2010 01:22:12 +0000 (01:22 +0000)
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
test/CodeGen/ARM/vdup.ll		patch \| blob \| history