From 1e5cdea9d7d8db6ffd6c97cf31e0e9e2765bfd5e Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 23 Jul 2009 02:22:41 +0000 Subject: [PATCH] Support insertps via the intrinsic and add a couple of simple testcases to make sure it's being generated. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@76843 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 16 ++++++++++++++-- test/CodeGen/X86/vec_insertps-1.ll | 13 +++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/X86/vec_insertps-1.ll diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 5d6ef36414a..2c9a064bd44 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3590,15 +3590,19 @@ let Constraints = "$src1 = $dst" in { defm PINSRD : SS41I_insert32<0x22, "pinsrd">; +// insertps has a few different modes, there's the first two here below which +// are optimized inserts that won't zero arbitrary elements in the destination +// vector. The next one matches the intrinsic and could zero arbitrary elements +// in the target vector. let Constraints = "$src1 = $dst" in { multiclass SS41I_insertf32 opc, string OpcodeStr> { - def rr : SS4AIi8, OpSize; - def rm : SS4AIi8; +} + defm INSERTPS : SS41I_insertf32<0x21, "insertps">; let Defs = [EFLAGS] in { diff --git a/test/CodeGen/X86/vec_insertps-1.ll b/test/CodeGen/X86/vec_insertps-1.ll new file mode 100644 index 00000000000..befd8974a27 --- /dev/null +++ b/test/CodeGen/X86/vec_insertps-1.ll @@ -0,0 +1,13 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 | grep insertps | count 2 + +define <4 x float> @t1(<4 x float> %t1, <4 x float> %t2) nounwind { + %tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 1) nounwind readnone + ret <4 x float> %tmp1 +} + +declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone + +define <4 x float> @t2(<4 x float> %t1, float %t2) nounwind { + %tmp1 = insertelement <4 x float> %t1, float %t2, i32 0 + ret <4 x float> %tmp1 +} \ No newline at end of file -- 2.34.1