From d577552c668b744a995c41ff0b73c68eb30d7b93 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 16 Nov 2012 06:37:56 +0000 Subject: [PATCH] Use roundps/pd for llvm.ceil, llvm.trunc, llvm.rint, and llvm.nearbyint of vector types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168141 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 16 ++++ lib/Target/X86/X86InstrSSE.td | 52 +++++++++++ test/CodeGen/X86/vec_floor.ll | 144 +++++++++++++++++++++++++++++ 3 files changed, 212 insertions(+) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d276353cca6..80dd9ef613b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -978,7 +978,15 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::FRINT, MVT::v4f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); + setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); + setOperationAction(ISD::FRINT, MVT::v2f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); // FIXME: Do we need to handle scalar-to-vector here? setOperationAction(ISD::MUL, MVT::v4i32, Legal); @@ -1058,6 +1066,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FDIV, MVT::v8f32, Legal); setOperationAction(ISD::FSQRT, MVT::v8f32, Legal); setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal); + setOperationAction(ISD::FCEIL, MVT::v8f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::v8f32, Legal); + setOperationAction(ISD::FRINT, MVT::v8f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v8f32, Legal); setOperationAction(ISD::FNEG, MVT::v8f32, Custom); setOperationAction(ISD::FABS, MVT::v8f32, Custom); @@ -1067,6 +1079,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FDIV, MVT::v4f64, Legal); setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); + setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); + setOperationAction(ISD::FRINT, MVT::v4f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Legal); setOperationAction(ISD::FNEG, MVT::v4f64, Custom); setOperationAction(ISD::FABS, MVT::v4f64, Custom); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 6f48d7ed7fe..229e8b263f9 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -6378,12 +6378,47 @@ let Predicates = [HasAVX] in { def : Pat<(v4f32 (ffloor VR128:$src)), (VROUNDPSr VR128:$src, (i32 0x1))>; + def : Pat<(v4f32 (fnearbyint VR128:$src)), + (VROUNDPSr VR128:$src, (i32 0xC))>; + def : Pat<(v4f32 (fceil VR128:$src)), + (VROUNDPSr VR128:$src, (i32 0x2))>; + def : Pat<(v4f32 (frint VR128:$src)), + (VROUNDPSr VR128:$src, (i32 0x4))>; + def : Pat<(v4f32 (ftrunc VR128:$src)), + (VROUNDPSr VR128:$src, (i32 0x3))>; + def : Pat<(v2f64 (ffloor VR128:$src)), (VROUNDPDr VR128:$src, (i32 0x1))>; + def : Pat<(v2f64 (fnearbyint VR128:$src)), + (VROUNDPDr VR128:$src, (i32 0xC))>; + def : Pat<(v2f64 (fceil VR128:$src)), + (VROUNDPDr VR128:$src, (i32 0x2))>; + def : Pat<(v2f64 (frint VR128:$src)), + (VROUNDPDr VR128:$src, (i32 0x4))>; + def : Pat<(v2f64 (ftrunc VR128:$src)), + (VROUNDPDr VR128:$src, (i32 0x3))>; + def : Pat<(v8f32 (ffloor VR256:$src)), (VROUNDYPSr VR256:$src, (i32 0x1))>; + def : Pat<(v8f32 (fnearbyint VR256:$src)), + (VROUNDYPSr VR256:$src, (i32 0xC))>; + def : Pat<(v8f32 (fceil VR256:$src)), + (VROUNDYPSr VR256:$src, (i32 0x2))>; + def : Pat<(v8f32 (frint VR256:$src)), + (VROUNDYPSr VR256:$src, (i32 0x4))>; + def : Pat<(v8f32 (ftrunc VR256:$src)), + (VROUNDYPSr VR256:$src, (i32 0x3))>; + def : Pat<(v4f64 (ffloor VR256:$src)), (VROUNDYPDr VR256:$src, (i32 0x1))>; + def : Pat<(v4f64 (fnearbyint VR256:$src)), + (VROUNDYPDr VR256:$src, (i32 0xC))>; + def : Pat<(v4f64 (fceil VR256:$src)), + (VROUNDYPDr VR256:$src, (i32 0x2))>; + def : Pat<(v4f64 (frint VR256:$src)), + (VROUNDYPDr VR256:$src, (i32 0x4))>; + def : Pat<(v4f64 (ftrunc VR256:$src)), + (VROUNDYPDr VR256:$src, (i32 0x3))>; } defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128, @@ -6417,8 +6452,25 @@ let Predicates = [UseSSE41] in { def : Pat<(v4f32 (ffloor VR128:$src)), (ROUNDPSr VR128:$src, (i32 0x1))>; + def : Pat<(v4f32 (fnearbyint VR128:$src)), + (ROUNDPSr VR128:$src, (i32 0xC))>; + def : Pat<(v4f32 (fceil VR128:$src)), + (ROUNDPSr VR128:$src, (i32 0x2))>; + def : Pat<(v4f32 (frint VR128:$src)), + (ROUNDPSr VR128:$src, (i32 0x4))>; + def : Pat<(v4f32 (ftrunc VR128:$src)), + (ROUNDPSr VR128:$src, (i32 0x3))>; + def : Pat<(v2f64 (ffloor VR128:$src)), (ROUNDPDr VR128:$src, (i32 0x1))>; + def : Pat<(v2f64 (fnearbyint VR128:$src)), + (ROUNDPDr VR128:$src, (i32 0xC))>; + def : Pat<(v2f64 (fceil VR128:$src)), + (ROUNDPDr VR128:$src, (i32 0x2))>; + def : Pat<(v2f64 (frint VR128:$src)), + (ROUNDPDr VR128:$src, (i32 0x4))>; + def : Pat<(v2f64 (ftrunc VR128:$src)), + (ROUNDPDr VR128:$src, (i32 0x3))>; } //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/vec_floor.ll b/test/CodeGen/X86/vec_floor.ll index 5e0160bd285..4db68bd1822 100644 --- a/test/CodeGen/X86/vec_floor.ll +++ b/test/CodeGen/X86/vec_floor.ll @@ -36,3 +36,147 @@ define <8 x float> @floor_v8f32(<8 x float> %p) ret <8 x float> %t } declare <8 x float> @llvm.floor.v8f32(<8 x float> %p) + +define <2 x double> @ceil_v2f64(<2 x double> %p) +{ + ; CHECK: ceil_v2f64 + ; CHECK: vroundpd + %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) + ret <2 x double> %t +} +declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p) + +define <4 x float> @ceil_v4f32(<4 x float> %p) +{ + ; CHECK: ceil_v4f32 + ; CHECK: vroundps + %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) + ret <4 x float> %t +} +declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p) + +define <4 x double> @ceil_v4f64(<4 x double> %p) +{ + ; CHECK: ceil_v4f64 + ; CHECK: vroundpd + %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) + ret <4 x double> %t +} +declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p) + +define <8 x float> @ceil_v8f32(<8 x float> %p) +{ + ; CHECK: ceil_v8f32 + ; CHECK: vroundps + %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) + ret <8 x float> %t +} +declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p) + +define <2 x double> @trunc_v2f64(<2 x double> %p) +{ + ; CHECK: trunc_v2f64 + ; CHECK: vroundpd + %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) + ret <2 x double> %t +} +declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p) + +define <4 x float> @trunc_v4f32(<4 x float> %p) +{ + ; CHECK: trunc_v4f32 + ; CHECK: vroundps + %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) + ret <4 x float> %t +} +declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p) + +define <4 x double> @trunc_v4f64(<4 x double> %p) +{ + ; CHECK: trunc_v4f64 + ; CHECK: vroundpd + %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) + ret <4 x double> %t +} +declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p) + +define <8 x float> @trunc_v8f32(<8 x float> %p) +{ + ; CHECK: trunc_v8f32 + ; CHECK: vroundps + %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) + ret <8 x float> %t +} +declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p) + +define <2 x double> @rint_v2f64(<2 x double> %p) +{ + ; CHECK: rint_v2f64 + ; CHECK: vroundpd + %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) + ret <2 x double> %t +} +declare <2 x double> @llvm.rint.v2f64(<2 x double> %p) + +define <4 x float> @rint_v4f32(<4 x float> %p) +{ + ; CHECK: rint_v4f32 + ; CHECK: vroundps + %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) + ret <4 x float> %t +} +declare <4 x float> @llvm.rint.v4f32(<4 x float> %p) + +define <4 x double> @rint_v4f64(<4 x double> %p) +{ + ; CHECK: rint_v4f64 + ; CHECK: vroundpd + %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) + ret <4 x double> %t +} +declare <4 x double> @llvm.rint.v4f64(<4 x double> %p) + +define <8 x float> @rint_v8f32(<8 x float> %p) +{ + ; CHECK: rint_v8f32 + ; CHECK: vroundps + %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) + ret <8 x float> %t +} +declare <8 x float> @llvm.rint.v8f32(<8 x float> %p) + +define <2 x double> @nearbyint_v2f64(<2 x double> %p) +{ + ; CHECK: nearbyint_v2f64 + ; CHECK: vroundpd + %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) + ret <2 x double> %t +} +declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) + +define <4 x float> @nearbyint_v4f32(<4 x float> %p) +{ + ; CHECK: nearbyint_v4f32 + ; CHECK: vroundps + %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) + ret <4 x float> %t +} +declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) + +define <4 x double> @nearbyint_v4f64(<4 x double> %p) +{ + ; CHECK: nearbyint_v4f64 + ; CHECK: vroundpd + %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) + ret <4 x double> %t +} +declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) + +define <8 x float> @nearbyint_v8f32(<8 x float> %p) +{ + ; CHECK: nearbyint_v8f32 + ; CHECK: vroundps + %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) + ret <8 x float> %t +} +declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) -- 2.34.1