From d345395ec97a303ffd420c3e761af7b9e3e4c338 Mon Sep 17 00:00:00 2001 From: Paul Redmond Date: Mon, 19 Aug 2013 20:01:35 +0000 Subject: [PATCH] Improve the widening of integral binary vector operations - split WidenVecRes_Binary into WidenVecRes_Binary and WidenVecRes_BinaryCanTrap - WidenVecRes_BinaryCanTrap preserves the original behaviour for operations that can trap - WidenVecRes_Binary simply widens the operation and improves codegen for 3-element vectors by allowing widening and promotion on x86 (matches the behaviour of unary and ternary operation widening) - use WidenVecRes_Binary for operations on integers. Reviewed by: nrotem git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188699 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 33 +++++++++++++------ test/CodeGen/X86/vsplit-and.ll | 2 +- test/CodeGen/X86/widen_arith-3.ll | 4 +-- test/CodeGen/X86/widen_load-2.ll | 8 ++--- 5 files changed, 28 insertions(+), 20 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d5d230a49a0..97d28f0ff35 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -631,6 +631,7 @@ private: SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); + SDValue WidenVecRes_BinaryCanTrap(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 0637412cc1a..bf65319295b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1448,27 +1448,31 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VECTOR_SHUFFLE: Res = WidenVecRes_VECTOR_SHUFFLE(cast(N)); break; + case ISD::ADD: case ISD::AND: case ISD::BSWAP: + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: + case ISD::OR: + case ISD::SUB: + case ISD::XOR: + Res = WidenVecRes_Binary(N); + break; + case ISD::FADD: case ISD::FCOPYSIGN: - case ISD::FDIV: case ISD::FMUL: case ISD::FPOW: - case ISD::FREM: case ISD::FSUB: - case ISD::MUL: - case ISD::MULHS: - case ISD::MULHU: - case ISD::OR: + case ISD::FDIV: + case ISD::FREM: case ISD::SDIV: - case ISD::SREM: case ISD::UDIV: + case ISD::SREM: case ISD::UREM: - case ISD::SUB: - case ISD::XOR: - Res = WidenVecRes_Binary(N); + Res = WidenVecRes_BinaryCanTrap(N); break; case ISD::FPOWI: @@ -1537,6 +1541,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { // Binary op widening. + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { + // Binary op widening for operations that can trap. unsigned Opcode = N->getOpcode(); SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); diff --git a/test/CodeGen/X86/vsplit-and.ll b/test/CodeGen/X86/vsplit-and.ll index 3b7fdff84e3..c16b2949358 100644 --- a/test/CodeGen/X86/vsplit-and.ll +++ b/test/CodeGen/X86/vsplit-and.ll @@ -14,7 +14,7 @@ define void @t0(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind read define void @t2(<3 x i64>* %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly { ; CHECK: t2 -; CHECK-NOT: pand +; CHECK: pand ; CHECK: ret %cmp1 = icmp ne <3 x i64> %src1, zeroinitializer %cmp2 = icmp ne <3 x i64> %src2, zeroinitializer diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll index d86042a4480..cf6346bfe28 100644 --- a/test/CodeGen/X86/widen_arith-3.ll +++ b/test/CodeGen/X86/widen_arith-3.ll @@ -1,7 +1,5 @@ ; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s -; CHECK: incl -; CHECK: incl -; CHECK: incl +; CHECK: paddd ; Widen a v3i16 to v8i16 to do a vector add diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll index f0f94e47921..2f203498fdb 100644 --- a/test/CodeGen/X86/widen_load-2.ll +++ b/test/CodeGen/X86/widen_load-2.ll @@ -73,9 +73,7 @@ define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) { ; CHECK: add3i16 %i16vec3 = type <3 x i16> define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind { -; CHECK: addl -; CHECK: addl -; CHECK: addl +; CHECK: paddd ; CHECK: ret %a = load %i16vec3* %ap, align 16 %b = load %i16vec3* %bp, align 16 @@ -135,9 +133,7 @@ define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* ; CHECK: add3i8 %i8vec3 = type <3 x i8> define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind { -; CHECK: addb -; CHECK: addb -; CHECK: addb +; CHECK: paddd ; CHECK: ret %a = load %i8vec3* %ap, align 16 %b = load %i8vec3* %bp, align 16 -- 2.34.1