Allow more cases of undef shuffle indices and add tests for them.

author Bob Wilson <bob.wilson@apple.com>

Tue, 17 Aug 2010 05:54:34 +0000 (05:54 +0000)

committer Bob Wilson <bob.wilson@apple.com>

Tue, 17 Aug 2010 05:54:34 +0000 (05:54 +0000)
author Bob Wilson <bob.wilson@apple.com>
Tue, 17 Aug 2010 05:54:34 +0000 (05:54 +0000)
committer Bob Wilson <bob.wilson@apple.com>
Tue, 17 Aug 2010 05:54:34 +0000 (05:54 +0000)
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index b05c5dd25977fe126f67c07066822500005c5496..c2bd471faa11b048e83243c504b826c01b27f3cc 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -3148,6 +3148,11 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
                         bool &ReverseVEXT, unsigned &Imm) {
    unsigned NumElts = VT.getVectorNumElements();
    ReverseVEXT = false;
+
+  // Assume that the first shuffle index is not UNDEF.  Fail if it is.
+  if (M[0] < 0)
+    return false;
+
    Imm = M[0];
  
    // If this is a VEXT shuffle, the immediate value is the index of the first
@@ -3163,6 +3168,7 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
        ReverseVEXT = true;
      }
  
+    if (M[i] < 0) continue; // ignore UNDEF indices
      if (ExpectedElt != static_cast<unsigned>(M[i]))
        return false;
    }
@@ -3188,13 +3194,16 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
  
    unsigned NumElts = VT.getVectorNumElements();
    unsigned BlockElts = M[0] + 1;
+  // If the first shuffle index is UNDEF, be optimistic.
+  if (M[0] < 0)
+    BlockElts = BlockSize / EltSz;
  
    if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
      return false;
  
    for (unsigned i = 0; i < NumElts; ++i) {
-    if ((unsigned) M[i] !=
-        (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
+    if (M[i] < 0) continue; // ignore UNDEF indices
+    if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
        return false;
    }
  
@@ -3210,8 +3219,8 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
    unsigned NumElts = VT.getVectorNumElements();
    WhichResult = (M[0] == 0 ? 0 : 1);
    for (unsigned i = 0; i < NumElts; i += 2) {
-    if ((unsigned) M[i] != i + WhichResult ||
-        (unsigned) M[i+1] != i + NumElts + WhichResult)
+    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+        (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
        return false;
    }
    return true;
@@ -3229,9 +3238,8 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
    unsigned NumElts = VT.getVectorNumElements();
    WhichResult = (M[0] == 0 ? 0 : 1);
    for (unsigned i = 0; i < NumElts; i += 2) {
-    if (M[i] < 0) continue;
-    if ((unsigned) M[i] != i + WhichResult ||
-        (unsigned) M[i+1] != i + WhichResult)
+    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+        (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
        return false;
    }
    return true;
@@ -3246,6 +3254,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
    unsigned NumElts = VT.getVectorNumElements();
    WhichResult = (M[0] == 0 ? 0 : 1);
    for (unsigned i = 0; i != NumElts; ++i) {
+    if (M[i] < 0) continue; // ignore UNDEF indices
      if ((unsigned) M[i] != 2 * i + WhichResult)
        return false;
    }
@@ -3271,7 +3280,8 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
    for (unsigned j = 0; j != 2; ++j) {
      unsigned Idx = WhichResult;
      for (unsigned i = 0; i != Half; ++i) {
-      if ((unsigned) M[i + j * Half] != Idx)
+      int MIdx = M[i + j * Half];
+      if (MIdx >= 0 && (unsigned) MIdx != Idx)
          return false;
        Idx += 2;
      }
@@ -3294,8 +3304,8 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
    WhichResult = (M[0] == 0 ? 0 : 1);
    unsigned Idx = WhichResult * NumElts / 2;
    for (unsigned i = 0; i != NumElts; i += 2) {
-    if ((unsigned) M[i] != Idx ||
-        (unsigned) M[i+1] != Idx + NumElts)
+    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
        return false;
      Idx += 1;
    }
@@ -3320,8 +3330,8 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
    WhichResult = (M[0] == 0 ? 0 : 1);
    unsigned Idx = WhichResult * NumElts / 2;
    for (unsigned i = 0; i != NumElts; i += 2) {
-    if ((unsigned) M[i] != Idx ||
-        (unsigned) M[i+1] != Idx)
+    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
        return false;
      Idx += 1;
    }
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll

index c11a67c6c4341f95273b985092b298bdb0d8f913..e460a84f6265f88e175977c040c79162a3ee781f 100644 (file)
--- a/test/CodeGen/ARM/vext.ll
+++ b/test/CodeGen/ARM/vext.ll
@@ -54,3 +54,23 @@ define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
         ret <4 x i32> %tmp3
  }
  
+; Undef shuffle indices should not prevent matching to VEXT:
+
+define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: test_vextd_undef:
+;CHECK: vext
+       %tmp1 = load <8 x i8>* %A
+       %tmp2 = load <8 x i8>* %B
+       %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10>
+       ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_vextRq_undef:
+;CHECK: vext
+       %tmp1 = load <16 x i8>* %A
+       %tmp2 = load <16 x i8>* %B
+       %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 undef, i32 undef, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6>
+       ret <16 x i8> %tmp3
+}
+
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll

index deed554d842c6bb72f26f7cede3169a95ab2ab83..e1fe64b02d9d31c02113eec9143b02f12d05684d 100644 (file)
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@@ -111,3 +111,21 @@ define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
         %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
         ret <16 x i8> %tmp2
  }
+
+; Undef shuffle indices should not prevent matching to VREV:
+
+define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev64D8_undef:
+;CHECK: vrev64.8
+       %tmp1 = load <8 x i8>* %A
+       %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
+       ret <8 x i8> %tmp2
+}
+
+define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
+;CHECK: test_vrev32Q16_undef:
+;CHECK: vrev32.16
+       %tmp1 = load <8 x i16>* %A
+       %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
+       ret <8 x i16> %tmp2
+}
diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll

index 10bb10ac24a176f494ea0e8d1ac7e2fcde1c265e..b1c2f93b47c6ca2934b636ceeaece5b7bac6592d 100644 (file)
--- a/test/CodeGen/ARM/vtrn.ll
+++ b/test/CodeGen/ARM/vtrn.ll
@@ -95,3 +95,30 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
          %tmp5 = fadd <4 x float> %tmp3, %tmp4
         ret <4 x float> %tmp5
  }
+
+; Undef shuffle indices should not prevent matching to VTRN:
+
+define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtrni8_undef:
+;CHECK: vtrn.8
+;CHECK-NEXT: vadd.i8
+       %tmp1 = load <8 x i8>* %A
+       %tmp2 = load <8 x i8>* %B
+       %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
+       %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+       ret <8 x i8> %tmp5
+}
+
+define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtrnQi16_undef:
+;CHECK: vtrn.16
+;CHECK-NEXT: vadd.i16
+       %tmp1 = load <8 x i16>* %A
+       %tmp2 = load <8 x i16>* %B
+       %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
+       %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+       ret <8 x i16> %tmp5
+}
+
diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll

index 6cef188d76dd4f0e63066d93a7ebeb661a38e691..9130f628919a550ac005248af18edf95116b74dd 100644 (file)
--- a/test/CodeGen/ARM/vuzp.ll
+++ b/test/CodeGen/ARM/vuzp.ll
@@ -73,3 +73,30 @@ define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
          %tmp5 = fadd <4 x float> %tmp3, %tmp4
         ret <4 x float> %tmp5
  }
+
+; Undef shuffle indices should not prevent matching to VUZP:
+
+define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vuzpi8_undef:
+;CHECK: vuzp.8
+;CHECK-NEXT: vadd.i8
+       %tmp1 = load <8 x i8>* %A
+       %tmp2 = load <8 x i8>* %B
+       %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
+       %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+       ret <8 x i8> %tmp5
+}
+
+define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vuzpQi16_undef:
+;CHECK: vuzp.16
+;CHECK-NEXT: vadd.i16
+       %tmp1 = load <8 x i16>* %A
+       %tmp2 = load <8 x i16>* %B
+       %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14>
+       %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+       ret <8 x i16> %tmp5
+}
+
diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll

index a9ecdcab42d7e00e00918b4406cf773de90bda0d..926970aeb29b4f1a60d1255366a36683a4811a7b 100644 (file)
--- a/test/CodeGen/ARM/vzip.ll
+++ b/test/CodeGen/ARM/vzip.ll
@@ -73,3 +73,30 @@ define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
          %tmp5 = fadd <4 x float> %tmp3, %tmp4
         ret <4 x float> %tmp5
  }
+
+; Undef shuffle indices should not prevent matching to VZIP:
+
+define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vzipi8_undef:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+       %tmp1 = load <8 x i8>* %A
+       %tmp2 = load <8 x i8>* %B
+       %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
+       %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+       ret <8 x i8> %tmp5
+}
+
+define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vzipQi8_undef:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+       %tmp1 = load <16 x i8>* %A
+       %tmp2 = load <16 x i8>* %B
+       %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+       %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+       ret <16 x i8> %tmp5
+}
+
author	Bob Wilson <bob.wilson@apple.com>
	Tue, 17 Aug 2010 05:54:34 +0000 (05:54 +0000)
committer	Bob Wilson <bob.wilson@apple.com>
	Tue, 17 Aug 2010 05:54:34 +0000 (05:54 +0000)
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
test/CodeGen/ARM/vext.ll		patch \| blob \| history
test/CodeGen/ARM/vrev.ll		patch \| blob \| history
test/CodeGen/ARM/vtrn.ll		patch \| blob \| history
test/CodeGen/ARM/vuzp.ll		patch \| blob \| history
test/CodeGen/ARM/vzip.ll		patch \| blob \| history