From 2d32816a4542ba0f71c82d017a67a06a8580b513 Mon Sep 17 00:00:00 2001
From: Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Date: Fri, 31 Oct 2014 19:19:07 +0000
Subject: [PATCH] [PowerPC] Initial VSX intrinsic support, with min/max for
 vector double

Now that we have initial support for VSX, we can begin adding
intrinsics for programmer access to VSX instructions.  This patch adds
basic support for VSX intrinsics in general, and tests it by
implementing intrinsics for minimum and maximum for the vector double
data type.

The LLVM portion of this is quite straightforward.  There is a
companion patch for Clang.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220988 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/IR/IntrinsicsPowerPC.td | 51 +++++++++++++++
 lib/Target/PowerPC/PPCInstrVSX.td    | 24 +++++--
 test/CodeGen/PowerPC/vsx-minmax.ll   | 98 ++++++++++++++++++++++++++++
 3 files changed, 167 insertions(+), 6 deletions(-)
 create mode 100644 test/CodeGen/PowerPC/vsx-minmax.ll
diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td
index 645e2844d23..5f8cda5e5f4 100644
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@@ -47,6 +47,13 @@ let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
                               list<IntrinsicProperty> properties>
     : GCCBuiltin<!strconcat("__builtin_altivec_", GCCIntSuffix)>,
       Intrinsic<ret_types, param_types, properties>;
+
+  /// PowerPC_VSX_Intrinsic - Base class for all VSX intrinsics.
+  class PowerPC_VSX_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
+                              list<LLVMType> param_types,
+                              list<IntrinsicProperty> properties>
+    : GCCBuiltin<!strconcat("__builtin_vsx_", GCCIntSuffix)>,
+      Intrinsic<ret_types, param_types, properties>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -88,6 +95,32 @@ class PowerPC_Vec_WWW_Intrinsic<string GCCIntSuffix>
                           [IntrNoMem]>;
 
 
+//===----------------------------------------------------------------------===//
+// PowerPC VSX Intrinsic Class Definitions.
+//
+
+/// PowerPC_VSX_Vec_DDD_Intrinsic - A PowerPC intrinsic that takes two v2f64
+/// vectors and returns one.  These intrinsics have no side effects.
+class PowerPC_VSX_Vec_DDD_Intrinsic<string GCCIntSuffix>
+  : PowerPC_VSX_Intrinsic<GCCIntSuffix,
+                          [llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty],
+                          [IntrNoMem]>;
+
+/// PowerPC_VSX_Vec_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f32
+/// vectors and returns one.  These intrinsics have no side effects.
+class PowerPC_VSX_Vec_FFF_Intrinsic<string GCCIntSuffix>
+  : PowerPC_VSX_Intrinsic<GCCIntSuffix,
+                          [llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                          [IntrNoMem]>;
+
+/// PowerPC_VSX_Sca_DDD_Intrinsic - A PowerPC intrinsic that takes two f64
+/// scalars and returns one.  These intrinsics have no side effects.
+class PowerPC_VSX_Sca_DDD_Intrinsic<string GCCIntSuffix>
+  : PowerPC_VSX_Intrinsic<GCCIntSuffix,
+                          [llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+                          [IntrNoMem]>;
+
+
 //===----------------------------------------------------------------------===//
 // PowerPC Altivec Intrinsic Definitions.
 
@@ -476,3 +509,21 @@ def int_ppc_altivec_vexptefp  : PowerPC_Vec_FF_Intrinsic<"vexptefp">;
 def int_ppc_altivec_vlogefp   : PowerPC_Vec_FF_Intrinsic<"vlogefp">;
 def int_ppc_altivec_vrefp     : PowerPC_Vec_FF_Intrinsic<"vrefp">;
 def int_ppc_altivec_vrsqrtefp : PowerPC_Vec_FF_Intrinsic<"vrsqrtefp">;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC VSX Intrinsic Definitions.
+
+let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
+
+// Vector maximum.
+def int_ppc_vsx_xvmaxdp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvmaxdp">;
+def int_ppc_vsx_xvmaxsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvmaxsp">;
+def int_ppc_vsx_xsmaxdp : PowerPC_VSX_Sca_DDD_Intrinsic<"xsmaxdp">;
+
+// Vector minimum.
+def int_ppc_vsx_xvmindp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvmindp">;
+def int_ppc_vsx_xvminsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvminsp">;
+def int_ppc_vsx_xsmindp : PowerPC_VSX_Sca_DDD_Intrinsic<"xsmindp">;
+
+}
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index a7c95fb5f84..3dfdf0806e3 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -643,24 +643,36 @@ let Uses = [RM] in {
   let isCommutable = 1 in {
   def XSMAXDP : XX3Form<60, 160,
                         (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
-                        "xsmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+                        "xsmaxdp $XT, $XA, $XB", IIC_VecFP,
+                        [(set vsfrc:$XT,
+                              (int_ppc_vsx_xsmaxdp vsfrc:$XA, vsfrc:$XB))]>;
   def XSMINDP : XX3Form<60, 168,
                         (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
-                        "xsmindp $XT, $XA, $XB", IIC_VecFP, []>;
+                        "xsmindp $XT, $XA, $XB", IIC_VecFP,
+                        [(set vsfrc:$XT,
+                              (int_ppc_vsx_xsmindp vsfrc:$XA, vsfrc:$XB))]>;
 
   def XVMAXDP : XX3Form<60, 224,
                         (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
-                        "xvmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+                        "xvmaxdp $XT, $XA, $XB", IIC_VecFP,
+                        [(set vsrc:$XT,
+                              (int_ppc_vsx_xvmaxdp vsrc:$XA, vsrc:$XB))]>;
   def XVMINDP : XX3Form<60, 232,
                         (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
-                        "xvmindp $XT, $XA, $XB", IIC_VecFP, []>;
+                        "xvmindp $XT, $XA, $XB", IIC_VecFP,
+                        [(set vsrc:$XT,
+                              (int_ppc_vsx_xvmindp vsrc:$XA, vsrc:$XB))]>;
 
   def XVMAXSP : XX3Form<60, 192,
                         (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
-                        "xvmaxsp $XT, $XA, $XB", IIC_VecFP, []>;
+                        "xvmaxsp $XT, $XA, $XB", IIC_VecFP,
+                        [(set vsrc:$XT,
+                              (int_ppc_vsx_xvmaxsp vsrc:$XA, vsrc:$XB))]>;
   def XVMINSP : XX3Form<60, 200,
                         (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
-                        "xvminsp $XT, $XA, $XB", IIC_VecFP, []>;
+                        "xvminsp $XT, $XA, $XB", IIC_VecFP,
+                        [(set vsrc:$XT,
+                              (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>;
   } // isCommutable
 } // Uses = [RM]
 
diff --git a/test/CodeGen/PowerPC/vsx-minmax.ll b/test/CodeGen/PowerPC/vsx-minmax.ll
new file mode 100644
index 00000000000..47f50abbc2a
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-minmax.ll
@@ -0,0 +1,98 @@
+; RUN: llc -mcpu=pwr7 -mattr=+vsx -O0 -fast-isel=0 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@vf = global <4 x float> <float -1.500000e+00, float 2.500000e+00, float -3.500000e+00, float 4.500000e+00>, align 16
+@vd = global <2 x double> <double 3.500000e+00, double -7.500000e+00>, align 16
+@d = global double 2.340000e+01, align 8
+@vf1 = common global <4 x float> zeroinitializer, align 16
+@vd1 = common global <2 x double> zeroinitializer, align 16
+@vf2 = common global <4 x float> zeroinitializer, align 16
+@vf3 = common global <4 x float> zeroinitializer, align 16
+@vd2 = common global <2 x double> zeroinitializer, align 16
+@vf4 = common global <4 x float> zeroinitializer, align 16
+@d1 = common global double 0.000000e+00, align 8
+@d2 = common global double 0.000000e+00, align 8
+
+; Function Attrs: nounwind
+define void @test1() #0 {
+; CHECK-LABEL: @test1
+entry:
+  %0 = load volatile <4 x float>* @vf, align 16
+  %1 = load volatile <4 x float>* @vf, align 16
+  %2 = tail call <4 x float> @llvm.ppc.vsx.xvmaxsp(<4 x float> %0, <4 x float> %1)
+; CHECK: xvmaxsp
+  store <4 x float> %2, <4 x float>* @vf1, align 16
+  %3 = load <2 x double>* @vd, align 16
+  %4 = tail call <2 x double> @llvm.ppc.vsx.xvmaxdp(<2 x double> %3, <2 x double> %3)
+; CHECK: xvmaxdp
+  store <2 x double> %4, <2 x double>* @vd1, align 16
+  %5 = load volatile <4 x float>* @vf, align 16
+  %6 = load volatile <4 x float>* @vf, align 16
+  %7 = tail call <4 x float> @llvm.ppc.vsx.xvmaxsp(<4 x float> %5, <4 x float> %6)
+; CHECK: xvmaxsp
+  store <4 x float> %7, <4 x float>* @vf2, align 16
+  %8 = load volatile <4 x float>* @vf, align 16
+  %9 = load volatile <4 x float>* @vf, align 16
+  %10 = tail call <4 x float> @llvm.ppc.vsx.xvminsp(<4 x float> %8, <4 x float> %9)
+; CHECK: xvminsp
+  store <4 x float> %10, <4 x float>* @vf3, align 16
+  %11 = load <2 x double>* @vd, align 16
+  %12 = tail call <2 x double> @llvm.ppc.vsx.xvmindp(<2 x double> %11, <2 x double> %11)
+; CHECK: xvmindp
+  store <2 x double> %12, <2 x double>* @vd2, align 16
+  %13 = load volatile <4 x float>* @vf, align 16
+  %14 = load volatile <4 x float>* @vf, align 16
+  %15 = tail call <4 x float> @llvm.ppc.vsx.xvminsp(<4 x float> %13, <4 x float> %14)
+; CHECK: xvminsp
+  store <4 x float> %15, <4 x float>* @vf4, align 16
+  %16 = load double* @d, align 8
+  %17 = tail call double @llvm.ppc.vsx.xsmaxdp(double %16, double %16)
+; CHECK: xsmaxdp
+  store double %17, double* @d1, align 8
+  %18 = tail call double @llvm.ppc.vsx.xsmindp(double %16, double %16)
+; CHECK: xsmindp
+  store double %18, double* @d2, align 8
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare double @llvm.ppc.vsx.xsmaxdp(double, double)
+
+; Function Attrs: nounwind readnone
+declare double @llvm.ppc.vsx.xsmindp(double, double)
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.ppc.vsx.xvminsp(<4 x float>, <4 x float>)
+
+; Function Attrs: nounwind readnone
+declare <2 x double> @llvm.ppc.vsx.xvmindp(<2 x double>, <2 x double>)
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.ppc.vsx.xvmaxsp(<4 x float>, <4 x float>)
+
+; Function Attrs: nounwind readnone
+declare <2 x double> @llvm.ppc.vsx.xvmaxdp(<2 x double>, <2 x double>)
+
+; Generated from C source:
+
+; % clang -O1 -maltivec -mvsx -S -emit-llvm vsx-minmax.c
+;
+;volatile vector float vf = { -1.5, 2.5, -3.5, 4.5 };
+;vector double vd = { 3.5, -7.5 };
+;double d = 23.4;
+;
+;vector float vf1, vf2, vf3, vf4;
+;vector double vd1, vd2;
+;double d1, d2;
+;
+;void test1() {
+;  vf1 = vec_max(vf, vf);
+;  vd1 = vec_max(vd, vd);
+;  vf2 = vec_vmaxfp(vf, vf);
+;  vf3 = vec_min(vf, vf);
+;  vd2 = vec_min(vd, vd);
+;  vf4 = vec_vminfp(vf, vf);
+;  d1 = __builtin_vsx_xsmaxdp(d, d);
+;  d2 = __builtin_vsx_xsmindp(d, d);
+;}
-- 
2.34.1