From cd5bbd8bad58562dff9979bbba73b840443fe9e5 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Mon, 12 Jan 2015 20:31:30 +0000 Subject: [PATCH] [X86] Also create+widen FMIN/FMAX nodes for v2f32. This happens in the HINT benchmark, where the SLP-vectorizer created v2f32 fcmp/select code. The "correct" solution would have been to teach the vectorizer cost model that v2f32 isn't legal (because really, it isn't), but if we can vectorize we might as well do so. We legalize these v2f32 FMIN/FMAX nodes by widening to v4f32 later on. v3f32 were already widened to v4f32 by the generic unroll-and-build-vector legalization. rdar://15763436 Differential Revision: http://reviews.llvm.org/D6557 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225691 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 19 +++++++++++++++- test/CodeGen/X86/sse-minmax.ll | 36 ++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 65440a0d22a..f455700d5ad 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -19658,6 +19658,22 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: llvm_unreachable("Do not know how to custom type legalize this operation!"); + // We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32. + case X86ISD::FMINC: + case X86ISD::FMIN: + case X86ISD::FMAXC: + case X86ISD::FMAX: { + EVT VT = N->getValueType(0); + if (VT != MVT::v2f32) + llvm_unreachable("Unexpected type (!= v2f32) on FMIN/FMAX."); + SDValue UNDEF = DAG.getUNDEF(VT); + SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, + N->getOperand(0), UNDEF); + SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, + N->getOperand(1), UNDEF); + Results.push_back(DAG.getNode(N->getOpcode(), dl, MVT::v4f32, LHS, RHS)); + return; + } case ISD::SIGN_EXTEND_INREG: case ISD::ADDC: case ISD::ADDE: @@ -23077,8 +23093,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // instructions match the semantics of the common C idiom xhasSSE2() || (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll index 46f2b56dd93..4dcb54ca4b0 100644 --- a/test/CodeGen/X86/sse-minmax.ll +++ b/test/CodeGen/X86/sse-minmax.ll @@ -989,3 +989,39 @@ define <4 x float> @test_minps(<4 x float> %x, <4 x float> %y) nounwind { %min = select <4 x i1> %min_is_x, <4 x float> %x, <4 x float> %y ret <4 x float> %min } + +; UNSAFE-LABEL: test_maxps_illegal_v2f32: +; UNSAFE-NEXT: maxps %xmm1, %xmm0 +; UNSAFE-NEXT: ret +define <2 x float> @test_maxps_illegal_v2f32(<2 x float> %x, <2 x float> %y) nounwind { + %max_is_x = fcmp oge <2 x float> %x, %y + %max = select <2 x i1> %max_is_x, <2 x float> %x, <2 x float> %y + ret <2 x float> %max +} + +; UNSAFE-LABEL: test_minps_illegal_v2f32: +; UNSAFE-NEXT: minps %xmm1, %xmm0 +; UNSAFE-NEXT: ret +define <2 x float> @test_minps_illegal_v2f32(<2 x float> %x, <2 x float> %y) nounwind { + %min_is_x = fcmp ole <2 x float> %x, %y + %min = select <2 x i1> %min_is_x, <2 x float> %x, <2 x float> %y + ret <2 x float> %min +} + +; UNSAFE-LABEL: test_maxps_illegal_v3f32: +; UNSAFE-NEXT: maxps %xmm1, %xmm0 +; UNSAFE-NEXT: ret +define <3 x float> @test_maxps_illegal_v3f32(<3 x float> %x, <3 x float> %y) nounwind { + %max_is_x = fcmp oge <3 x float> %x, %y + %max = select <3 x i1> %max_is_x, <3 x float> %x, <3 x float> %y + ret <3 x float> %max +} + +; UNSAFE-LABEL: test_minps_illegal_v3f32: +; UNSAFE-NEXT: minps %xmm1, %xmm0 +; UNSAFE-NEXT: ret +define <3 x float> @test_minps_illegal_v3f32(<3 x float> %x, <3 x float> %y) nounwind { + %min_is_x = fcmp ole <3 x float> %x, %y + %min = select <3 x i1> %min_is_x, <3 x float> %x, <3 x float> %y + ret <3 x float> %min +} -- 2.34.1