From 18e1185ddf0069498b1da9cbca758c0f6650b388 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Thu, 13 Nov 2014 11:46:16 +0000 Subject: [PATCH] AVX-512: SINT_TO_FP cost model and some bugfixes Checked some corner cases, for example translation of <8 x i1> to <8 x double> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221883 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 22 ++++++++-- lib/Target/X86/X86TargetTransformInfo.cpp | 7 +++ test/Analysis/CostModel/X86/sitofp.ll | 45 +++++++++++++++++++ test/CodeGen/X86/avx512-cvt.ll | 53 +++++++++++++++++++++++ 4 files changed, 123 insertions(+), 4 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0f16f809ba1..47c8ce03296 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1410,6 +1410,10 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote); setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); @@ -13209,10 +13213,18 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { MVT SrcVT = Op.getOperand(0).getSimpleValueType(); + SDLoc dl(Op); - if (SrcVT.isVector()) + if (SrcVT.isVector()) { + if (SrcVT.getVectorElementType() == MVT::i1) { + MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements()); + return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), + DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, + Op.getOperand(0))); + } return SDValue(); - + } + assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 && "Unknown SINT_TO_FP to lower!"); @@ -13225,7 +13237,6 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, return Op; } - SDLoc dl(Op); unsigned Size = SrcVT.getSizeInBits()/8; MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false); @@ -15455,8 +15466,11 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget if (NumElts != 8 && NumElts != 16) return SDValue(); - if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) + if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) { + if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT) + return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0)); return DAG.getNode(X86ISD::VSEXT, dl, VT, In); + } const TargetLowering &TLI = DAG.getTargetLoweringInfo(); assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type"); diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index dc08a3b0e14..2b70fd0ecf8 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -618,6 +618,13 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i32, 3 }, { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i32, 3 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 }, + { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 }, + { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 }, + { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 }, }; if (ST->hasAVX512()) { diff --git a/test/Analysis/CostModel/X86/sitofp.ll b/test/Analysis/CostModel/X86/sitofp.ll index 338d9741652..edc937ecf94 100644 --- a/test/Analysis/CostModel/X86/sitofp.ll +++ b/test/Analysis/CostModel/X86/sitofp.ll @@ -1,4 +1,5 @@ ; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s +; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze < %s | FileCheck --check-prefix=AVX512F %s define <2 x double> @sitofpv2i8v2double(<2 x i8> %a) { ; SSE2: sitofpv2i8v2double @@ -279,3 +280,47 @@ define <32 x float> @sitofpv32i64v32float(<32 x i64> %a) { %1 = sitofp <32 x i64> %a to <32 x float> ret <32 x float> %1 } + +; AVX512F-LABEL: sitofp_16i8_float +; AVX512F: cost of 2 {{.*}} sitofp +define <16 x float> @sitofp_16i8_float(<16 x i8> %a) { + %1 = sitofp <16 x i8> %a to <16 x float> + ret <16 x float> %1 +} + +define <16 x float> @sitofp_16i16_float(<16 x i16> %a) { + ; AVX512F-LABEL: sitofp_16i16_float + ; AVX512F: cost of 2 {{.*}} sitofp + %1 = sitofp <16 x i16> %a to <16 x float> + ret <16 x float> %1 +} + +; AVX512F-LABEL: sitofp_8i8_double +; AVX512F: cost of 2 {{.*}} sitofp +define <8 x double> @sitofp_8i8_double(<8 x i8> %a) { + %1 = sitofp <8 x i8> %a to <8 x double> + ret <8 x double> %1 +} + +; AVX512F-LABEL: sitofp_8i16_double +; AVX512F: cost of 2 {{.*}} sitofp +define <8 x double> @sitofp_8i16_double(<8 x i16> %a) { + %1 = sitofp <8 x i16> %a to <8 x double> + ret <8 x double> %1 +} + +; AVX512F-LABEL: sitofp_8i1_double +; AVX512F: cost of 4 {{.*}} sitofp +define <8 x double> @sitofp_8i1_double(<8 x double> %a) { + %cmpres = fcmp ogt <8 x double> %a, zeroinitializer + %1 = sitofp <8 x i1> %cmpres to <8 x double> + ret <8 x double> %1 +} + +; AVX512F-LABEL: sitofp_16i1_float +; AVX512F: cost of 3 {{.*}} sitofp +define <16 x float> @sitofp_16i1_float(<16 x float> %a) { + %cmpres = fcmp ogt <16 x float> %a, zeroinitializer + %1 = sitofp <16 x i1> %cmpres to <16 x float> + ret <16 x float> %1 +} diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index f5cda96b99f..2b672a72d53 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -255,3 +255,56 @@ define double @uitofp03(i32 %a) nounwind { %b = uitofp i32 %a to double ret double %b } + +; CHECK-LABEL: @sitofp_16i1_float +; CHECK: vpbroadcastd +; CHECK: vcvtdq2ps +define <16 x float> @sitofp_16i1_float(<16 x i32> %a) { + %mask = icmp slt <16 x i32> %a, zeroinitializer + %1 = sitofp <16 x i1> %mask to <16 x float> + ret <16 x float> %1 +} + +; CHECK-LABEL: @sitofp_16i8_float +; CHECK: vpmovsxbd +; CHECK: vcvtdq2ps +define <16 x float> @sitofp_16i8_float(<16 x i8> %a) { + %1 = sitofp <16 x i8> %a to <16 x float> + ret <16 x float> %1 +} + +; CHECK-LABEL: @sitofp_16i16_float +; CHECK: vpmovsxwd +; CHECK: vcvtdq2ps +define <16 x float> @sitofp_16i16_float(<16 x i16> %a) { + %1 = sitofp <16 x i16> %a to <16 x float> + ret <16 x float> %1 +} + +; CHECK-LABEL: @sitofp_8i16_double +; CHECK: vpmovsxwd +; CHECK: vcvtdq2pd +define <8 x double> @sitofp_8i16_double(<8 x i16> %a) { + %1 = sitofp <8 x i16> %a to <8 x double> + ret <8 x double> %1 +} + +; CHECK-LABEL: sitofp_8i8_double +; CHECK: vpmovzxwd +; CHECK: vpslld +; CHECK: vpsrad +; CHECK: vcvtdq2pd +define <8 x double> @sitofp_8i8_double(<8 x i8> %a) { + %1 = sitofp <8 x i8> %a to <8 x double> + ret <8 x double> %1 +} + + +; CHECK-LABEL: @sitofp_8i1_double +; CHECK: vpbroadcastq +; CHECK: vcvtdq2pd +define <8 x double> @sitofp_8i1_double(<8 x double> %a) { + %cmpres = fcmp ogt <8 x double> %a, zeroinitializer + %1 = sitofp <8 x i1> %cmpres to <8 x double> + ret <8 x double> %1 +} -- 2.34.1