[PowerPC] Enable use of lxvw4x/stxvw4x in VSX code generation

author Bill Schmidt <wschmidt@linux.vnet.ibm.com>

Fri, 17 Oct 2014 15:13:38 +0000 (15:13 +0000)

committer Bill Schmidt <wschmidt@linux.vnet.ibm.com>

Fri, 17 Oct 2014 15:13:38 +0000 (15:13 +0000)
author Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Fri, 17 Oct 2014 15:13:38 +0000 (15:13 +0000)
committer Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Fri, 17 Oct 2014 15:13:38 +0000 (15:13 +0000)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index b29674d992bb5e05e41e281e96315ab660ac0f6e..2808b41671d11f84af94b85142b740a147d77daa 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -8312,6 +8312,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
      unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
      if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
          TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
+        // P8 and later hardware should just use LOAD.
+        !TM.getSubtarget<PPCSubtarget>().hasP8Vector() &&
          (VT == MVT::v16i8 || VT == MVT::v8i16 ||
           VT == MVT::v4i32 || VT == MVT::v4f32) &&
          LD->getAlignment() < ABIAlignment) {
@@ -9204,7 +9206,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
  
    if (VT.getSimpleVT().isVector()) {
      if (Subtarget.hasVSX()) {
-      if (VT != MVT::v2f64 && VT != MVT::v2i64)
+      if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
+          VT != MVT::v4f32 && VT != MVT::v4i32)
          return false;
      } else {
        return false;
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td

index f30e64f95d1757cb1728d6fa40b6ee5cf0d7abce..4b2f5a321f803d048de37db453e890b284fe93e2 100644 (file)
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -63,7 +63,8 @@ let Uses = [RM] in {
  
      def LXVW4X : XX1Form<31, 780,
                           (outs vsrc:$XT), (ins memrr:$src),
-                         "lxvw4x $XT, $src", IIC_LdStLFD, []>;
+                         "lxvw4x $XT, $src", IIC_LdStLFD,
+                         [(set v4i32:$XT, (load xoaddr:$src))]>;
    }
  
    // Store indexed instructions
@@ -80,7 +81,8 @@ let Uses = [RM] in {
  
      def STXVW4X : XX1Form<31, 908,
                           (outs), (ins vsrc:$XT, memrr:$dst),
-                         "stxvw4x $XT, $dst", IIC_LdStSTFD, []>;
+                         "stxvw4x $XT, $dst", IIC_LdStSTFD,
+                         [(store v4i32:$XT, xoaddr:$dst)]>;
    }
  
    // Add/Mul Instructions
@@ -811,6 +813,13 @@ def : Pat<(sext_inreg v2i64:$C, v2i32),
  def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))),
            (XVCVSXWDP (XXSLDWI $C, $C, 1))>;
  
+// Loads.
+def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+
+// Stores.
+def : Pat<(store v4i32:$rS, xoaddr:$dst),
+          (STXVW4X $rS, xoaddr:$dst)>;
+
  } // AddedComplexity
  } // HasVSX
  
diff --git a/test/CodeGen/PowerPC/unal4-std.ll b/test/CodeGen/PowerPC/unal4-std.ll

index 9f29e31cb9024deec16b18b248bb8763df6ea6dc..e911099111618b7e1394e3aa482ad12532269a5d 100644 (file)
--- a/test/CodeGen/PowerPC/unal4-std.ll
+++ b/test/CodeGen/PowerPC/unal4-std.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mcpu=pwr7 -mattr=-vsx| FileCheck %s
+; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s
  target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
  target triple = "powerpc64-unknown-linux-gnu"
  
@@ -22,6 +23,9 @@ if.end210:                                        ; preds = %entry
  ; a multiple of 4).
  ; CHECK: @copy_to_conceal
  ; CHECK: stdx {{[0-9]+}}, 0,
+
+; CHECK-VSX: @copy_to_conceal
+; CHECK-VSX: stxvw4x {{[0-9]+}}, 0,
  }
  
  attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/unaligned.ll b/test/CodeGen/PowerPC/unaligned.ll

index 0c59516f1186728d3bba44776bf63895030b7967..64c03cdda35e70f240c5a3f7795ab19557e0296f 100644 (file)
--- a/test/CodeGen/PowerPC/unaligned.ll
+++ b/test/CodeGen/PowerPC/unaligned.ll
@@ -92,10 +92,14 @@ entry:
  ; CHECK-DAG: stdx
  ; CHECK: stdx
  
+; For VSX on P7, unaligned loads and stores are preferable to aligned
+; stack slots, but lvsl/vperm is better still.  (On P8 lxvw4x is preferable.)
+; Using unaligned stxvw4x is preferable on both machines.
  ; CHECK-VSX: @foo6
-; CHECK-VSX-DAG: ld
-; CHECK-VSX-DAG: ld
-; CHECK-VSX-DAG: stdx
-; CHECK-VSX: stdx
+; CHECK-VSX-DAG: lvsl
+; CHECK-VSX-DAG: lvx
+; CHECK-VSX-DAG: lvx
+; CHECK-VSX: vperm
+; CHECK-VSX: stxvw4x
  }
  
diff --git a/test/CodeGen/PowerPC/vec-abi-align.ll b/test/CodeGen/PowerPC/vec-abi-align.ll

index 3239cf6c06ab649bf7c42c1c1277302795c2fdd9..5075ff2b8c07964c319f798edd34d5b4ab3ffb71 100644 (file)
--- a/test/CodeGen/PowerPC/vec-abi-align.ll
+++ b/test/CodeGen/PowerPC/vec-abi-align.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s
  target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
  target triple = "powerpc64-unknown-linux-gnu"
  
@@ -16,6 +17,10 @@ entry:
  ; CHECK-LABEL: @test1
  ; CHECK: stvx 2,
  ; CHECK: blr
+
+; CHECK-VSX-LABEL: @test1
+; CHECK-VSX: stxvw4x 34,
+; CHECK-VSX: blr
  }
  
  ; Function Attrs: nounwind
@@ -35,6 +40,13 @@ entry:
  ; CHECK: addi [[REGB:[0-9]+]], 1, 112
  ; CHECK: lvx 2, [[REGB]], [[REG16]]
  ; CHECK: blr
+
+; CHECK-VSX-LABEL: @test2
+; CHECK-VSX: ld {{[0-9]+}}, 112(1)
+; CHECK-VSX: li [[REG16:[0-9]+]], 16
+; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 112
+; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
+; CHECK-VSX: blr
  }
  
  ; Function Attrs: nounwind
@@ -54,6 +66,13 @@ entry:
  ; CHECK: addi [[REGB:[0-9]+]], 1, 128
  ; CHECK: lvx 2, [[REGB]], [[REG16]]
  ; CHECK: blr
+
+; CHECK-VSX-LABEL: @test3
+; CHECK-VSX: ld {{[0-9]+}}, 128(1)
+; CHECK-VSX: li [[REG16:[0-9]+]], 16
+; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 128
+; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
+; CHECK-VSX: blr
  }
  
  attributes #0 = { nounwind }
diff --git a/test/CodeGen/PowerPC/vec_misaligned.ll b/test/CodeGen/PowerPC/vec_misaligned.ll

index 304a84d49a9da82dc3cddace2b7172ba05cc5558..73a4a4d395daf4227871ae6af2d099af0ecdfa18 100644 (file)
--- a/test/CodeGen/PowerPC/vec_misaligned.ll
+++ b/test/CodeGen/PowerPC/vec_misaligned.ll
@@ -1,5 +1,5 @@
  ; RUN: llc < %s -march=ppc32 -mcpu=g5 | FileCheck %s
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -mattr=-vsx -mattr=-power8-vector | FileCheck %s
  ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mattr=+altivec | FileCheck %s -check-prefix=CHECK-LE
  
  target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
diff --git a/test/CodeGen/PowerPC/vrspill.ll b/test/CodeGen/PowerPC/vrspill.ll

index c3d1bf8f1ead2ad201dac41b6f0df2cc430110d3..b990442aed87545dbc59596188a41c4dc436205c 100644 (file)
--- a/test/CodeGen/PowerPC/vrspill.ll
+++ b/test/CodeGen/PowerPC/vrspill.ll
@@ -1,5 +1,7 @@
-; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -verify-machineinstrs -fast-isel=false < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec -mattr=-vsx -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -mattr=-vsx -verify-machineinstrs -fast-isel=false < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec -mattr=+vsx -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -mattr=+vsx -verify-machineinstrs -fast-isel=false < %s | FileCheck -check-prefix=CHECK-VSX %s
  
  ; This verifies that we generate correct spill/reload code for vector regs.
  
@@ -15,4 +17,9 @@ entry:
  
  ; CHECK: stvx 2,
  
+; We would prefer to test for "stxvw4x 34," but current -O0 code
+; needlessly generates "vor 3,2,2 / stxvw4x 35,0,3", so we'll settle for
+; the opcode.
+; CHECK-VSX: stxvw4x
+
  declare void @foo(i32*)
diff --git a/test/CodeGen/PowerPC/vsx-p8.ll b/test/CodeGen/PowerPC/vsx-p8.ll

new file mode 100644 (file)

index 0000000..81406b6
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-p8.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mcpu=pwr8 -mattr=+power8-vector < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Unaligned loads/stores on P8 and later should use VSX where possible.
+
+define <2 x double> @test28u(<2 x double>* %a) {
+  %v = load <2 x double>* %a, align 8
+  ret <2 x double> %v
+
+; CHECK-LABEL: @test28u
+; CHECK: lxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test29u(<2 x double>* %a, <2 x double> %b) {
+  store <2 x double> %b, <2 x double>* %a, align 8
+  ret void
+
+; CHECK-LABEL: @test29u
+; CHECK: stxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define <4 x float> @test32u(<4 x float>* %a) {
+  %v = load <4 x float>* %a, align 8
+  ret <4 x float> %v
+
+; CHECK-LABEL: @test32u
+; CHECK: lxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test33u(<4 x float>* %a, <4 x float> %b) {
+  store <4 x float> %b, <4 x float>* %a, align 8
+  ret void
+
+; CHECK-LABEL: @test33u
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/vsx.ll b/test/CodeGen/PowerPC/vsx.ll

index 2f226e1f614cc523f31a6f61fa2d30f06be08d92..65343f4a9ba66b00fec03b335b9fb4bc333bb7df 100644 (file)
--- a/test/CodeGen/PowerPC/vsx.ll
+++ b/test/CodeGen/PowerPC/vsx.ll
@@ -356,6 +356,63 @@ define void @test31(<2 x i64>* %a, <2 x i64> %b) {
  ; CHECK: blr
  }
  
+define <4 x float> @test32(<4 x float>* %a) {
+  %v = load <4 x float>* %a, align 16
+  ret <4 x float> %v
+
+; CHECK-LABEL: @test32
+; CHECK: lxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test33(<4 x float>* %a, <4 x float> %b) {
+  store <4 x float> %b, <4 x float>* %a, align 16
+  ret void
+
+; CHECK-LABEL: @test33
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define <4 x float> @test32u(<4 x float>* %a) {
+  %v = load <4 x float>* %a, align 8
+  ret <4 x float> %v
+
+; CHECK-LABEL: @test32u
+; CHECK-DAG: lvsl
+; CHECK-DAG: lvx
+; CHECK-DAG: lvx
+; CHECK: vperm 2,
+; CHECK: blr
+}
+
+define void @test33u(<4 x float>* %a, <4 x float> %b) {
+  store <4 x float> %b, <4 x float>* %a, align 8
+  ret void
+
+; CHECK-LABEL: @test33u
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define <4 x i32> @test34(<4 x i32>* %a) {
+  %v = load <4 x i32>* %a, align 16
+  ret <4 x i32> %v
+
+; CHECK-LABEL: @test34
+; CHECK: lxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test35(<4 x i32>* %a, <4 x i32> %b) {
+  store <4 x i32> %b, <4 x i32>* %a, align 16
+  ret void
+
+; CHECK-LABEL: @test35
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
  define <2 x double> @test40(<2 x i64> %a) {
    %v = uitofp <2 x i64> %a to <2 x double>
    ret <2 x double> %v
author	Bill Schmidt <wschmidt@linux.vnet.ibm.com>
	Fri, 17 Oct 2014 15:13:38 +0000 (15:13 +0000)
committer	Bill Schmidt <wschmidt@linux.vnet.ibm.com>
	Fri, 17 Oct 2014 15:13:38 +0000 (15:13 +0000)
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCInstrVSX.td		patch \| blob \| history
test/CodeGen/PowerPC/unal4-std.ll		patch \| blob \| history
test/CodeGen/PowerPC/unaligned.ll		patch \| blob \| history
test/CodeGen/PowerPC/vec-abi-align.ll		patch \| blob \| history
test/CodeGen/PowerPC/vec_misaligned.ll		patch \| blob \| history
test/CodeGen/PowerPC/vrspill.ll		patch \| blob \| history
test/CodeGen/PowerPC/vsx-p8.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/PowerPC/vsx.ll		patch \| blob \| history