From 9a2a305ed489f2cddfafbebbe280c3cfe7b835f0 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Tue, 9 Dec 2014 16:52:29 +0000 Subject: [PATCH] [PowerPC 3/4] Little-endian adjustments for VSX vector shuffle When performing instruction selection for ISD::VECTOR_SHUFFLE, there is special code for handling v2f64 and v2i64 using VSX instructions. This code must be adjusted for little-endian. Because the two inputs are treated as a double-wide register, we must swap their order for little endian. To get the appropriate mask elements to use with the big-endian biased XXPERMDI instruction, we must reverse their order and invert the bits. A new test is added to test the 16 possible values of the shuffle mask. It is initially disabled for reasons specified in the test. It is re-enabled by patch 4/4. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223791 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 9 ++ test/CodeGen/PowerPC/vsx_shuffle_le.ll | 212 +++++++++++++++++++++++++ 2 files changed, 221 insertions(+) create mode 100644 test/CodeGen/PowerPC/vsx_shuffle_le.ll diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index e04d7e1bc35..595052ebc30 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1380,6 +1380,15 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { else DM[i] = 1; + // For little endian, we must swap the input operands and adjust + // the mask elements (reverse and invert them). + if (PPCSubTarget->isLittleEndian()) { + std::swap(Op1, Op2); + unsigned tmp = DM[0]; + DM[0] = 1 - DM[1]; + DM[1] = 1 - tmp; + } + SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), MVT::i32); if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 && diff --git a/test/CodeGen/PowerPC/vsx_shuffle_le.ll b/test/CodeGen/PowerPC/vsx_shuffle_le.ll new file mode 100644 index 00000000000..b3682b96f13 --- /dev/null +++ b/test/CodeGen/PowerPC/vsx_shuffle_le.ll @@ -0,0 +1,212 @@ +; Note: This test is disabled until VSX is enabled for LE, as otherwise +; we don't get the correct code gen. +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s +; FIXME: Remove this and all above lines when VSX is enabled for LE. + +; R;UN: llc -mcpu=pwr8 -mattr=+vsx -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +define <2 x double> @test00(<2 x double>* %p1, <2 x double>* %p2) { + %v1 = load <2 x double>* %p1 + %v2 = load <2 x double>* %p2 + %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 0, i32 0> + ret <2 x double> %v3 + +; CHECK-LABEL: test00 +; CHECK: lxvd2x 0, 0, 3 +; CHECK: xxpermdi 0, 0, 0, 2 +; CHECK: xxpermdi 34, 0, 0, 3 +} + +define <2 x double> @test01(<2 x double>* %p1, <2 x double>* %p2) { + %v1 = load <2 x double>* %p1 + %v2 = load <2 x double>* %p2 + %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 0, i32 1> + ret <2 x double> %v3 + +; CHECK-LABEL: test01 +; CHECK: lxvd2x 0, 0, 3 +; CHECK: xxpermdi 34, 0, 0, 2 +} + +define <2 x double> @test02(<2 x double>* %p1, <2 x double>* %p2) { + %v1 = load <2 x double>* %p1 + %v2 = load <2 x double>* %p2 + %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 0, i32 2> + ret <2 x double> %v3 + +; CHECK-LABEL: @test02 +; CHECK: lxvd2x 0, 0, 3 +; CHECK: lxvd2x 1, 0, 4 +; CHECK: xxpermdi 0, 0, 0, 2 +; CHECK: xxpermdi 1, 1, 1, 2 +; CHECK: xxpermdi 34, 1, 0, 3 +} + +define <2 x double> @test03(<2 x double>* %p1, <2 x double>* %p2) { + %v1 = load <2 x double>* %p1 + %v2 = load <2 x double>* %p2 + %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 0, i32 3> + ret <2 x double> %v3 + +; CHECK-LABEL: @test03 +; CHECK: lxvd2x 0, 0, 3 +; CHECK: lxvd2x 1, 0, 4 +; CHECK: xxpermdi 0, 0, 0, 2 +; CHECK: xxpermdi 1, 1, 1, 2 +; CHECK: xxpermdi 34, 1, 0, 1 +} + +define <2 x double> @test10(<2 x double>* %p1, <2 x double>* %p2) { + %v1 = load <2 x double>* %p1 + %v2 = load <2 x double>* %p2 + %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 1, i32 0> + ret <2 x double> %v3 + +; CHECK-LABEL: @test10 +; CHECK: lxvd2x 0, 0, 3 +; CHECK: xxpermdi 0, 0, 0, 2 +; CHECK: xxpermdi 34, 0, 0, 2 +} + +define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) { + %v1 = load <2 x double>* %p1 + %v2 = load <2 x double>* %p2 + %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 1, i32 1> + ret <2 x double> %v3 + +; CHECK-LABEL: @test11 +; CHECK: lxvd2x 0, 0, 3 +; CHECK: xxpermdi 0, 0, 0, 2 +; CHECK: xxpermdi 34, 0, 0, 0 +} + +define <2 x double> @test12(<2 x double>* %p1, <2 x double>* %p2) { + %v1 = load <2 x double>* %p1 + %v2 = load <2 x double>* %p2 + %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 1, i32 2> + ret <2 x double> %v3 + +; CHECK-LABEL: @test12 +; CHECK: lxvd2x 0, 0, 3 +; CHECK: lxvd2x 1, 0, 4 +; CHECK: xxpermdi 0, 0, 0, 2 +; CHECK: xxpermdi 1, 1, 1, 2 +; CHECK: xxpermdi 34, 1, 0, 2 +} + +define <2 x double> @test13(<2 x double>* %p1, <2 x double>* %p2) { + %v1 = load <2 x double>* %p1 + %v2 = load <2 x double>* %p2 + %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 1, i32 3> + ret <2 x double> %v3 + +; CHECK-LABEL: @test13 +; CHECK: lxvd2x 0, 0, 3 +; CHECK: lxvd2x 1, 0, 4 +; CHECK: xxpermdi 0, 0, 0, 2 +; CHECK: xxpermdi 1, 1, 1, 2 +; CHECK: xxpermdi 34, 1, 0, 0 +} + +define <2 x double> @test20(<2 x double>* %p1, <2 x double>* %p2) { + %v1 = load <2 x double>* %p1 + %v2 = load <2 x double>* %p2 + %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 2, i32 0> + ret <2 x double> %v3 + +; CHECK-LABEL: @test20 +; CHECK: lxvd2x 0, 0, 3 +; CHECK: lxvd2x 1, 0, 4 +; CHECK: xxpermdi 0, 0, 0, 2 +; CHECK: xxpermdi 1, 1, 1, 2 +; CHECK: xxpermdi 34, 0, 1, 3 +} + +define <2 x double> @test21(<2 x double>* %p1, <2 x double>* %p2) { + %v1 = load <2 x double>* %p1 + %v2 = load <2 x double>* %p2 + %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 2, i32 1> + ret <2 x double> %v3 + +; CHECK-LABEL: @test21 +; CHECK: lxvd2x 0, 0, 3 +; CHECK: lxvd2x 1, 0, 4 +; CHECK: xxpermdi 0, 0, 0, 2 +; CHECK: xxpermdi 1, 1, 1, 2 +; CHECK: xxpermdi 34, 0, 1, 1 +} + +define <2 x double> @test22(<2 x double>* %p1, <2 x double>* %p2) { + %v1 = load <2 x double>* %p1 + %v2 = load <2 x double>* %p2 + %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 2, i32 2> + ret <2 x double> %v3 + +; CHECK-LABEL: @test22 +; CHECK: lxvd2x 0, 0, 4 +; CHECK: xxpermdi 0, 0, 0, 2 +; CHECK: xxpermdi 34, 0, 0, 3 +} + +define <2 x double> @test23(<2 x double>* %p1, <2 x double>* %p2) { + %v1 = load <2 x double>* %p1 + %v2 = load <2 x double>* %p2 + %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 2, i32 3> + ret <2 x double> %v3 + +; CHECK-LABEL: @test23 +; CHECK: lxvd2x 0, 0, 4 +; CHECK: xxpermdi 34, 0, 0, 2 +} + +define <2 x double> @test30(<2 x double>* %p1, <2 x double>* %p2) { + %v1 = load <2 x double>* %p1 + %v2 = load <2 x double>* %p2 + %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 3, i32 0> + ret <2 x double> %v3 + +; CHECK-LABEL: @test30 +; CHECK: lxvd2x 0, 0, 3 +; CHECK: lxvd2x 1, 0, 4 +; CHECK: xxpermdi 0, 0, 0, 2 +; CHECK: xxpermdi 1, 1, 1, 2 +; CHECK: xxpermdi 34, 0, 1, 2 +} + +define <2 x double> @test31(<2 x double>* %p1, <2 x double>* %p2) { + %v1 = load <2 x double>* %p1 + %v2 = load <2 x double>* %p2 + %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 3, i32 1> + ret <2 x double> %v3 + +; CHECK-LABEL: @test31 +; CHECK: lxvd2x 0, 0, 3 +; CHECK: lxvd2x 1, 0, 4 +; CHECK: xxpermdi 0, 0, 0, 2 +; CHECK: xxpermdi 1, 1, 1, 2 +; CHECK: xxpermdi 34, 0, 1, 0 +} + +define <2 x double> @test32(<2 x double>* %p1, <2 x double>* %p2) { + %v1 = load <2 x double>* %p1 + %v2 = load <2 x double>* %p2 + %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 3, i32 2> + ret <2 x double> %v3 + +; CHECK-LABEL: @test32 +; CHECK: lxvd2x 0, 0, 4 +; CHECK: xxpermdi 0, 0, 0, 2 +; CHECK: xxpermdi 34, 0, 0, 2 +} + +define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) { + %v1 = load <2 x double>* %p1 + %v2 = load <2 x double>* %p2 + %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 3, i32 3> + ret <2 x double> %v3 + +; CHECK-LABEL: @test33 +; CHECK: lxvd2x 0, 0, 4 +; CHECK: xxpermdi 0, 0, 0, 2 +; CHECK: xxpermdi 34, 0, 0, 0 +} -- 2.34.1