From d381a7a91ee6fb72cda3e08b71d602c4c6d83d35 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Thu, 16 Jun 2011 07:03:21 +0000 Subject: [PATCH] Add AVX suport for fpextend. Original patch by Syoyo Fujita with more comments by me. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133153 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 19 +++++++++++++++++++ test/CodeGen/X86/avx-128.ll | 10 ++++++++++ 2 files changed, 29 insertions(+) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a38e3721f35..7774057d3da 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2063,6 +2063,15 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; +// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while +// in the non-AVX version bits 127:64 aren't touched. Find a better way to +// represent this instead of always zeroing SRC1. One possible solution is +// to represent the instruction w/ something similar as the "$src1 = $dst" +// constraint but without the tied operands. +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), addr:$src)>, + Requires<[HasAVX, OptForSpeed]>; + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Load/Store XCSR register //===----------------------------------------------------------------------===// @@ -3589,6 +3598,16 @@ let Predicates = [HasSSE2] in def : Pat<(fextend (loadf32 addr:$src)), (CVTSS2SDrm addr:$src)>; +// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while +// in the non-AVX version bits 127:64 aren't touched. Find a better way to +// represent this instead of always zeroing SRC1. One possible solution is +// to represent the instruction w/ something similar as the "$src1 = $dst" +// constraint but without the tied operands. +let Predicates = [HasAVX] in + def : Pat<(fextend (loadf32 addr:$src)), + (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), + addr:$src)>; + // bit_convert let Predicates = [HasXMMInt] in { def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; diff --git a/test/CodeGen/X86/avx-128.ll b/test/CodeGen/X86/avx-128.ll index 2bd3b5dfedd..c29cb5d36c3 100644 --- a/test/CodeGen/X86/avx-128.ll +++ b/test/CodeGen/X86/avx-128.ll @@ -10,3 +10,13 @@ entry: ret void } +define void @fpext() nounwind uwtable { +entry: + %f = alloca float, align 4 + %d = alloca double, align 8 + %tmp = load float* %f, align 4 + ; CHECK: vcvtss2sd + %conv = fpext float %tmp to double + store double %conv, double* %d, align 8 + ret void +} -- 2.34.1