From 3b45f263c3669c65a76f7033ee24093a9870cfcf Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Mon, 7 Dec 2015 14:33:34 +0000 Subject: [PATCH] VX-512: Fixed a bug in FP logic operation lowering FP logic instructions are supported in DQ extension on AVX-512 target. I use integer operations instead. Added tests. I also enabled FABS in this patch in order to check ANDPS. The operations are FOR, FXOR, FAND, FANDN. The instructions, that supported for 512-bit vector under DQ are: VORPS/PD, VXORPS/PD, VANDPS/PD, FANDNPS/PD. Differential Revision: http://reviews.llvm.org/D15110 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254913 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 55 +++++++++++++++-------- lib/Target/X86/X86InstrInfo.td | 1 + lib/Target/X86/X86InstrSSE.td | 2 +- test/CodeGen/X86/avx-logic.ll | 1 + test/CodeGen/X86/avx512-arith.ll | 71 ++++++++++++++++++++++++++++++ test/CodeGen/X86/vec_fabs.ll | 2 +- 6 files changed, 111 insertions(+), 21 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fa6f5c8be88..21bca74353c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1340,6 +1340,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FDIV, MVT::v16f32, Legal); setOperationAction(ISD::FSQRT, MVT::v16f32, Legal); setOperationAction(ISD::FNEG, MVT::v16f32, Custom); + setOperationAction(ISD::FABS, MVT::v16f32, Custom); setOperationAction(ISD::FADD, MVT::v8f64, Legal); setOperationAction(ISD::FSUB, MVT::v8f64, Legal); @@ -1347,6 +1348,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FDIV, MVT::v8f64, Legal); setOperationAction(ISD::FSQRT, MVT::v8f64, Legal); setOperationAction(ISD::FNEG, MVT::v8f64, Custom); + setOperationAction(ISD::FABS, MVT::v8f64, Custom); setOperationAction(ISD::FMA, MVT::v8f64, Legal); setOperationAction(ISD::FMA, MVT::v16f32, Legal); @@ -26339,6 +26341,31 @@ static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (VT.is512BitVector() && !Subtarget->hasDQI()) { + // VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extention. + // These logic operations may be executed in the integer domain. + SDLoc dl(N); + MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits()); + MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements()); + + SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0)); + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1)); + unsigned IntOpcode = 0; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected FP logic op"); + case X86ISD::FOR: IntOpcode = ISD::OR; break; + case X86ISD::FXOR: IntOpcode = ISD::XOR; break; + case X86ISD::FAND: IntOpcode = ISD::AND; break; + case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break; + } + SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1); + return DAG.getNode(ISD::BITCAST, dl, VT, IntOp); + } + return SDValue(); +} /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes. static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { @@ -26354,19 +26381,7 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG, if (C->getValueAPF().isPosZero()) return N->getOperand(0); - EVT VT = N->getValueType(0); - if (VT.is512BitVector() && !Subtarget->hasDQI()) { - SDLoc dl(N); - MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits()); - MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements()); - - SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0)); - SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1)); - unsigned IntOpcode = (N->getOpcode() == X86ISD::FOR) ? ISD::OR : ISD::XOR; - SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1); - return DAG.getNode(ISD::BITCAST, dl, VT, IntOp); - } - return SDValue(); + return lowerX86FPLogicOp(N, DAG, Subtarget); } /// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes. @@ -26391,7 +26406,8 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) { } /// Do target-specific dag combines on X86ISD::FAND nodes. -static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { // FAND(0.0, x) -> 0.0 if (ConstantFPSDNode *C = dyn_cast(N->getOperand(0))) if (C->getValueAPF().isPosZero()) @@ -26402,11 +26418,12 @@ static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { if (C->getValueAPF().isPosZero()) return N->getOperand(1); - return SDValue(); + return lowerX86FPLogicOp(N, DAG, Subtarget); } /// Do target-specific dag combines on X86ISD::FANDN nodes -static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { // FANDN(0.0, x) -> x if (ConstantFPSDNode *C = dyn_cast(N->getOperand(0))) if (C->getValueAPF().isPosZero()) @@ -26417,7 +26434,7 @@ static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) { if (C->getValueAPF().isPosZero()) return N->getOperand(1); - return SDValue(); + return lowerX86FPLogicOp(N, DAG, Subtarget); } static SDValue PerformBTCombine(SDNode *N, @@ -27233,8 +27250,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FOR: return PerformFORCombine(N, DAG, Subtarget); case X86ISD::FMIN: case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG); - case X86ISD::FAND: return PerformFANDCombine(N, DAG); - case X86ISD::FANDN: return PerformFANDNCombine(N, DAG); + case X86ISD::FAND: return PerformFANDCombine(N, DAG, Subtarget); + case X86ISD::FANDN: return PerformFANDNCombine(N, DAG, Subtarget); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); case ISD::ANY_EXTEND: diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 4a4ceaca88f..b412f8fb3ec 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -770,6 +770,7 @@ def HasVLX : Predicate<"Subtarget->hasVLX()">, AssemblerPredicate<"FeatureVLX", "AVX-512 VL ISA">; def NoVLX : Predicate<"!Subtarget->hasVLX()">; def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">; +def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a93240bd717..a545335dd5d 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2906,7 +2906,7 @@ let isCodeGenOnly = 1 in { // Multiclass for vectors using the X86 logical operation aliases for FP. multiclass sse12_fp_packed_vector_logical_alias< bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { - let Predicates = [HasAVX, NoVLX] in { + let Predicates = [HasAVX, NoVLX_Or_NoDQI] in { defm V#NAME#PS : sse12_fp_packed, PS, VEX_4V; diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll index a91fe7e0c52..e9e7d5aea27 100644 --- a/test/CodeGen/X86/avx-logic.ll +++ b/test/CodeGen/X86/avx-logic.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { ; CHECK-LABEL: andpd256: diff --git a/test/CodeGen/X86/avx512-arith.ll b/test/CodeGen/X86/avx512-arith.ll index d7da77a5eb5..9220e4f269c 100644 --- a/test/CodeGen/X86/avx512-arith.ll +++ b/test/CodeGen/X86/avx512-arith.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s @@ -823,3 +824,73 @@ define <16 x float> @test_fxor(<16 x float> %a) { ret <16 x float>%res } +define <8 x float> @test_fxor_8f32(<8 x float> %a) { +; CHECK-LABEL: test_fxor_8f32: +; CHECK: ## BB#0: +; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0 +; CHECK-NEXT: retq + %res = fsub <8 x float> , %a + ret <8 x float>%res +} + +define <8 x double> @fabs_v8f64(<8 x double> %p) +; AVX512F-LABEL: fabs_v8f64: +; AVX512F: ## BB#0: +; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: fabs_v8f64: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: fabs_v8f64: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: fabs_v8f64: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512DQ-NEXT: retq +; +; SKX-LABEL: fabs_v8f64: +; SKX: ## BB#0: +; SKX-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0 +; SKX-NEXT: retq +{ + %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) + ret <8 x double> %t +} +declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) + +define <16 x float> @fabs_v16f32(<16 x float> %p) +; AVX512F-LABEL: fabs_v16f32: +; AVX512F: ## BB#0: +; AVX512F-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: fabs_v16f32: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: fabs_v16f32: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: fabs_v16f32: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 +; AVX512DQ-NEXT: retq +; +; SKX-LABEL: fabs_v16f32: +; SKX: ## BB#0: +; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 +; SKX-NEXT: retq +{ + %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) + ret <16 x float> %t +} +declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p) diff --git a/test/CodeGen/X86/vec_fabs.ll b/test/CodeGen/X86/vec_fabs.ll index 960b5f27cf5..54f33b2bd22 100644 --- a/test/CodeGen/X86/vec_fabs.ll +++ b/test/CodeGen/X86/vec_fabs.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s - +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s define <2 x double> @fabs_v2f64(<2 x double> %p) { -- 2.34.1