[LegalizeVectorTypes] Allow single loads and stores for more short vectors

author Ulrich Weigand <ulrich.weigand@de.ibm.com>

Tue, 5 May 2015 19:32:57 +0000 (19:32 +0000)

committer Ulrich Weigand <ulrich.weigand@de.ibm.com>

Tue, 5 May 2015 19:32:57 +0000 (19:32 +0000)
author Ulrich Weigand <ulrich.weigand@de.ibm.com>
Tue, 5 May 2015 19:32:57 +0000 (19:32 +0000)
committer Ulrich Weigand <ulrich.weigand@de.ibm.com>
Tue, 5 May 2015 19:32:57 +0000 (19:32 +0000)
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

index f9a0d2901a21937c32dc78da8c292f740e4d0083..9afbc5ef66eb99c9aa27eb35a580c86924b6c0dd 100644 (file)
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3098,7 +3098,10 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
      unsigned MemVTWidth = MemVT.getSizeInBits();
      if (MemVT.getSizeInBits() <= WidenEltWidth)
        break;
-    if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+    auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
+    if ((Action == TargetLowering::TypeLegal ||
+         Action == TargetLowering::TypePromoteInteger) &&
+        (WidenWidth % MemVTWidth) == 0 &&
          isPowerOf2_32(WidenWidth / MemVTWidth) &&
          (MemVTWidth <= Width ||
           (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
diff --git a/test/CodeGen/SystemZ/vec-move-02.ll b/test/CodeGen/SystemZ/vec-move-02.ll

index e43676055fada66a66a098a76829fbd459faa67d..dcaf0acccb2fb454318bbc68eba06a5a1f9dd74b 100644 (file)
--- a/test/CodeGen/SystemZ/vec-move-02.ll
+++ b/test/CodeGen/SystemZ/vec-move-02.ll
@@ -109,3 +109,66 @@ define <16 x i8> @f11(i8 *%base, i64 %index) {
    %ret = load <16 x i8>, <16 x i8> *%ptr, align 1
    ret <16 x i8> %ret
  }
+
+; Test v2i8 loads.
+define <2 x i8> @f12(<2 x i8> *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: vlreph %v24, 0(%r2)
+; CHECK: br %r14
+  %ret = load <2 x i8>, <2 x i8> *%ptr
+  ret <2 x i8> %ret
+}
+
+; Test v4i8 loads.
+define <4 x i8> @f13(<4 x i8> *%ptr) {
+; CHECK-LABEL: f13:
+; CHECK: vlrepf %v24, 0(%r2)
+; CHECK: br %r14
+  %ret = load <4 x i8>, <4 x i8> *%ptr
+  ret <4 x i8> %ret
+}
+
+; Test v8i8 loads.
+define <8 x i8> @f14(<8 x i8> *%ptr) {
+; CHECK-LABEL: f14:
+; CHECK: vlrepg %v24, 0(%r2)
+; CHECK: br %r14
+  %ret = load <8 x i8>, <8 x i8> *%ptr
+  ret <8 x i8> %ret
+}
+
+; Test v2i16 loads.
+define <2 x i16> @f15(<2 x i16> *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK: vlrepf %v24, 0(%r2)
+; CHECK: br %r14
+  %ret = load <2 x i16>, <2 x i16> *%ptr
+  ret <2 x i16> %ret
+}
+
+; Test v4i16 loads.
+define <4 x i16> @f16(<4 x i16> *%ptr) {
+; CHECK-LABEL: f16:
+; CHECK: vlrepg %v24, 0(%r2)
+; CHECK: br %r14
+  %ret = load <4 x i16>, <4 x i16> *%ptr
+  ret <4 x i16> %ret
+}
+
+; Test v2i32 loads.
+define <2 x i32> @f17(<2 x i32> *%ptr) {
+; CHECK-LABEL: f17:
+; CHECK: vlrepg %v24, 0(%r2)
+; CHECK: br %r14
+  %ret = load <2 x i32>, <2 x i32> *%ptr
+  ret <2 x i32> %ret
+}
+
+; Test v2f32 loads.
+define <2 x float> @f18(<2 x float> *%ptr) {
+; CHECK-LABEL: f18:
+; CHECK: vlrepg %v24, 0(%r2)
+; CHECK: br %r14
+  %ret = load <2 x float>, <2 x float> *%ptr
+  ret <2 x float> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-move-03.ll b/test/CodeGen/SystemZ/vec-move-03.ll

index 1b1f96163a06f5fab3b6a663b95599fa760da450..f40e2cb2bf281ce3600e00c7c8f8a33a344a996d 100644 (file)
--- a/test/CodeGen/SystemZ/vec-move-03.ll
+++ b/test/CodeGen/SystemZ/vec-move-03.ll
@@ -109,3 +109,66 @@ define void @f11(<16 x i8> %val, i8 *%base, i64 %index) {
    store <16 x i8> %val, <16 x i8> *%ptr, align 1
    ret void
  }
+
+; Test v2i8 stores.
+define void @f12(<2 x i8> %val, <2 x i8> *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: vsteh %v24, 0(%r2), 0
+; CHECK: br %r14
+  store <2 x i8> %val, <2 x i8> *%ptr
+  ret void
+}
+
+; Test v4i8 stores.
+define void @f13(<4 x i8> %val, <4 x i8> *%ptr) {
+; CHECK-LABEL: f13:
+; CHECK: vstef %v24, 0(%r2)
+; CHECK: br %r14
+  store <4 x i8> %val, <4 x i8> *%ptr
+  ret void
+}
+
+; Test v8i8 stores.
+define void @f14(<8 x i8> %val, <8 x i8> *%ptr) {
+; CHECK-LABEL: f14:
+; CHECK: vsteg %v24, 0(%r2)
+; CHECK: br %r14
+  store <8 x i8> %val, <8 x i8> *%ptr
+  ret void
+}
+
+; Test v2i16 stores.
+define void @f15(<2 x i16> %val, <2 x i16> *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK: vstef %v24, 0(%r2), 0
+; CHECK: br %r14
+  store <2 x i16> %val, <2 x i16> *%ptr
+  ret void
+}
+
+; Test v4i16 stores.
+define void @f16(<4 x i16> %val, <4 x i16> *%ptr) {
+; CHECK-LABEL: f16:
+; CHECK: vsteg %v24, 0(%r2)
+; CHECK: br %r14
+  store <4 x i16> %val, <4 x i16> *%ptr
+  ret void
+}
+
+; Test v2i32 stores.
+define void @f17(<2 x i32> %val, <2 x i32> *%ptr) {
+; CHECK-LABEL: f17:
+; CHECK: vsteg %v24, 0(%r2), 0
+; CHECK: br %r14
+  store <2 x i32> %val, <2 x i32> *%ptr
+  ret void
+}
+
+; Test v2f32 stores.
+define void @f18(<2 x float> %val, <2 x float> *%ptr) {
+; CHECK-LABEL: f18:
+; CHECK: vsteg %v24, 0(%r2), 0
+; CHECK: br %r14
+  store <2 x float> %val, <2 x float> *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/vec-move-15.ll b/test/CodeGen/SystemZ/vec-move-15.ll

index bf375e111cc5a483314130e25b017b672e146a3a..503627c163c64278bb274a980b5b601e22666b33 100644 (file)
--- a/test/CodeGen/SystemZ/vec-move-15.ll
+++ b/test/CodeGen/SystemZ/vec-move-15.ll
@@ -71,8 +71,7 @@ define <2 x i64> @f7(<2 x i1> *%ptr) {
  ; Test a v2i8->v2i64 extension.
  define <2 x i64> @f8(<2 x i8> *%ptr) {
  ; CHECK-LABEL: f8:
-; CHECK: vlrepb [[REG1:%v[0-9]+]], 0(%r2)
-; CHECK: vleb [[REG1]], 1(%r2), 1
+; CHECK: vlreph [[REG1:%v[0-9]+]], 0(%r2)
  ; CHECK: vuphb [[REG2:%v[0-9]+]], [[REG1]]
  ; CHECK: vuphh [[REG3:%v[0-9]+]], [[REG2]]
  ; CHECK: vuphf %v24, [[REG3]]
diff --git a/test/CodeGen/SystemZ/vec-move-16.ll b/test/CodeGen/SystemZ/vec-move-16.ll

index 152b0d4d88c4bff60922021198558569b0ea0b72..cd25773968001d076c6fdeaa037a1c3caaf96a6f 100644 (file)
--- a/test/CodeGen/SystemZ/vec-move-16.ll
+++ b/test/CodeGen/SystemZ/vec-move-16.ll
@@ -71,8 +71,7 @@ define <2 x i64> @f7(<2 x i1> *%ptr) {
  ; Test a v2i8->v2i64 extension.
  define <2 x i64> @f8(<2 x i8> *%ptr) {
  ; CHECK-LABEL: f8:
-; CHECK: vlrepb [[REG1:%v[0-9]+]], 0(%r2)
-; CHECK: vleb [[REG1]], 1(%r2), 1
+; CHECK: vlreph [[REG1:%v[0-9]+]], 0(%r2)
  ; CHECK: vuplhb [[REG2:%v[0-9]+]], [[REG1]]
  ; CHECK: vuplhh [[REG3:%v[0-9]+]], [[REG2]]
  ; CHECK: vuplhf %v24, [[REG3]]
diff --git a/test/CodeGen/SystemZ/vec-move-17.ll b/test/CodeGen/SystemZ/vec-move-17.ll

new file mode 100644 (file)

index 0000000..e7fc06c
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-17.ll
@@ -0,0 +1,104 @@
+; Test vector truncating stores.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8->v16i1 truncation.
+define void @f1(<16 x i8> %val, <16 x i1> *%ptr) {
+; No expected output, but must compile.
+  %trunc = trunc <16 x i8> %val to <16 x i1>
+  store <16 x i1> %trunc, <16 x i1> *%ptr
+  ret void
+}
+
+; Test a v8i16->v8i1 truncation.
+define void @f2(<8 x i16> %val, <8 x i1> *%ptr) {
+; No expected output, but must compile.
+  %trunc = trunc <8 x i16> %val to <8 x i1>
+  store <8 x i1> %trunc, <8 x i1> *%ptr
+  ret void
+}
+
+; Test a v8i16->v8i8 truncation.
+define void @f3(<8 x i16> %val, <8 x i8> *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: vpkh [[REG1:%v[0-9]+]], %v24, %v24
+; CHECK: vsteg [[REG1]], 0(%r2)
+; CHECK: br %r14
+  %trunc = trunc <8 x i16> %val to <8 x i8>
+  store <8 x i8> %trunc, <8 x i8> *%ptr
+  ret void
+}
+
+; Test a v4i32->v4i1 truncation.
+define void @f4(<4 x i32> %val, <4 x i1> *%ptr) {
+; No expected output, but must compile.
+  %trunc = trunc <4 x i32> %val to <4 x i1>
+  store <4 x i1> %trunc, <4 x i1> *%ptr
+  ret void
+}
+
+; Test a v4i32->v4i8 truncation.  At the moment we use a VPERM rather than
+; a chain of packs.
+define void @f5(<4 x i32> %val, <4 x i8> *%ptr) {
+; CHECK-LABEL: f5:
+; CHECK: vperm [[REG:%v[0-9]+]],
+; CHECK: vstef [[REG]], 0(%r2)
+; CHECK: br %r14
+  %trunc = trunc <4 x i32> %val to <4 x i8>
+  store <4 x i8> %trunc, <4 x i8> *%ptr
+  ret void
+}
+
+; Test a v4i32->v4i16 truncation.
+define void @f6(<4 x i32> %val, <4 x i16> *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: vpkf [[REG1:%v[0-9]+]], %v24, %v24
+; CHECK: vsteg [[REG1]], 0(%r2)
+; CHECK: br %r14
+  %trunc = trunc <4 x i32> %val to <4 x i16>
+  store <4 x i16> %trunc, <4 x i16> *%ptr
+  ret void
+}
+
+; Test a v2i64->v2i1 truncation.
+define void @f7(<2 x i64> %val, <2 x i1> *%ptr) {
+; No expected output, but must compile.
+  %trunc = trunc <2 x i64> %val to <2 x i1>
+  store <2 x i1> %trunc, <2 x i1> *%ptr
+  ret void
+}
+
+; Test a v2i64->v2i8 truncation.  At the moment we use a VPERM rather than
+; a chain of packs.
+define void @f8(<2 x i64> %val, <2 x i8> *%ptr) {
+; CHECK-LABEL: f8:
+; CHECK: vperm [[REG:%v[0-9]+]],
+; CHECK: vsteh [[REG]], 0(%r2)
+; CHECK: br %r14
+  %trunc = trunc <2 x i64> %val to <2 x i8>
+  store <2 x i8> %trunc, <2 x i8> *%ptr
+  ret void
+}
+
+; Test a v2i64->v2i16 truncation.  At the moment we use a VPERM rather than
+; a chain of packs.
+define void @f9(<2 x i64> %val, <2 x i16> *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: vperm [[REG:%v[0-9]+]],
+; CHECK: vstef [[REG]], 0(%r2)
+; CHECK: br %r14
+  %trunc = trunc <2 x i64> %val to <2 x i16>
+  store <2 x i16> %trunc, <2 x i16> *%ptr
+  ret void
+}
+
+; Test a v2i64->v2i32 truncation.
+define void @f10(<2 x i64> %val, <2 x i32> *%ptr) {
+; CHECK-LABEL: f10:
+; CHECK: vpkg [[REG1:%v[0-9]+]], %v24, %v24
+; CHECK: vsteg [[REG1]], 0(%r2)
+; CHECK: br %r14
+  %trunc = trunc <2 x i64> %val to <2 x i32>
+  store <2 x i32> %trunc, <2 x i32> *%ptr
+  ret void
+}
author	Ulrich Weigand <ulrich.weigand@de.ibm.com>
	Tue, 5 May 2015 19:32:57 +0000 (19:32 +0000)
committer	Ulrich Weigand <ulrich.weigand@de.ibm.com>
	Tue, 5 May 2015 19:32:57 +0000 (19:32 +0000)
lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp		patch \| blob \| history
test/CodeGen/SystemZ/vec-move-02.ll		patch \| blob \| history
test/CodeGen/SystemZ/vec-move-03.ll		patch \| blob \| history
test/CodeGen/SystemZ/vec-move-15.ll		patch \| blob \| history
test/CodeGen/SystemZ/vec-move-16.ll		patch \| blob \| history
test/CodeGen/SystemZ/vec-move-17.ll	[new file with mode: 0644]	patch \| blob