ARM NEON: Handle v16i8 and v8i16 reverse shuffles

author Arnold Schwaighofer <aschwaighofer@apple.com>

Tue, 12 Feb 2013 01:58:32 +0000 (01:58 +0000)

committer Arnold Schwaighofer <aschwaighofer@apple.com>

Tue, 12 Feb 2013 01:58:32 +0000 (01:58 +0000)
author Arnold Schwaighofer <aschwaighofer@apple.com>
Tue, 12 Feb 2013 01:58:32 +0000 (01:58 +0000)
committer Arnold Schwaighofer <aschwaighofer@apple.com>
Tue, 12 Feb 2013 01:58:32 +0000 (01:58 +0000)
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index 4c9d2da2edf17ea99265167beb793cffdbd329ec..9d7a37929a9112289d065cdb40a00f5f7510ed0e 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -4294,6 +4294,21 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
    return true;
  }
  
+/// \return true if this is a reverse operation on an vector.
+static bool isReverseMask(ArrayRef<int> M, EVT VT) {
+  unsigned NumElts = VT.getVectorNumElements();
+  // Make sure the mask has the right size.
+  if (NumElts != M.size())
+      return false;
+
+  // Look for <15, ..., 3, -1, 1, 0>.
+  for (unsigned i = 0; i != NumElts; ++i)
+    if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
+      return false;
+
+  return true;
+}
+
  // If N is an integer constant that can be moved into a register in one
  // instruction, return an SDValue of such a constant (will become a MOV
  // instruction).  Otherwise return null.
@@ -4689,7 +4704,8 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
            isVZIPMask(M, VT, WhichResult) ||
            isVTRN_v_undef_Mask(M, VT, WhichResult) ||
            isVUZP_v_undef_Mask(M, VT, WhichResult) ||
-          isVZIP_v_undef_Mask(M, VT, WhichResult));
+          isVZIP_v_undef_Mask(M, VT, WhichResult) ||
+          ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
  }
  
  /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
@@ -4793,6 +4809,23 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
                                   &VTBLMask[0], 8));
  }
  
+static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
+                                                      SelectionDAG &DAG) {
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue OpLHS = Op.getOperand(0);
+  EVT VT = OpLHS.getValueType();
+
+  assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
+         "Expect an v8i16/v16i8 type");
+  OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
+  // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
+  // extract the first 8 bytes into the top double word and the last 8 bytes
+  // into the bottom double word. The v8i16 case is similar.
+  unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
+  return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
+                     DAG.getConstant(ExtractNum, MVT::i32));
+}
+
  static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
    SDValue V1 = Op.getOperand(0);
    SDValue V2 = Op.getOperand(1);
@@ -4930,6 +4963,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
      return DAG.getNode(ISD::BITCAST, dl, VT, Val);
    }
  
+  if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
+    return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
+
    if (VT == MVT::v8i8) {
      SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
      if (NewOp.getNode())
diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll

index a38a0feae042997a5cd815954bdbd1f738a3fa8a..42964deb0b5e51a0fc2951faf8634e40a44caed4 100644 (file)
--- a/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -133,3 +133,30 @@ define i16 @foldBuildVectors() {
    %3 = extractelement <8 x i16> %2, i32 0
    ret i16 %3
  }
+
+; Test that we are generating vrev and vext for reverse shuffles of v8i16
+; shuffles.
+; CHECK: reverse_v8i16
+define void @reverse_v8i16(<8 x i16>* %loadaddr, <8 x i16>* %storeaddr) {
+  %v0 = load <8 x i16>* %loadaddr
+  ; CHECK: vrev64.16
+  ; CHECK: vext.16
+  %v1 = shufflevector <8 x i16> %v0, <8 x i16> undef,
+              <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  store <8 x i16> %v1, <8 x i16>* %storeaddr
+  ret void
+}
+
+; Test that we are generating vrev and vext for reverse shuffles of v16i8
+; shuffles.
+; CHECK: reverse_v16i8
+define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) {
+  %v0 = load <16 x i8>* %loadaddr
+  ; CHECK: vrev64.8
+  ; CHECK: vext.8
+  %v1 = shufflevector <16 x i8> %v0, <16 x i8> undef,
+       <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8,
+                   i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  store <16 x i8> %v1, <16 x i8>* %storeaddr
+  ret void
+}
author	Arnold Schwaighofer <aschwaighofer@apple.com>
	Tue, 12 Feb 2013 01:58:32 +0000 (01:58 +0000)
committer	Arnold Schwaighofer <aschwaighofer@apple.com>
	Tue, 12 Feb 2013 01:58:32 +0000 (01:58 +0000)
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
test/CodeGen/ARM/vector-DAGCombine.ll		patch \| blob \| history