From f1b790f7913a07ccc51f47a898ad1bed33bb614d Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Wed, 18 Jun 2014 16:51:10 +0000 Subject: [PATCH] [X86] AVX512: Add non-temporal stores Note that I followed the AVX2 convention here and didn't add LLVM intrinsics for stores. These can be generated with the nontemporal hint on LLVM IR stores (see new test). The GCC builtins are lowered directly into nontemporal stores. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211176 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 29 ++++++++++++++++++++++++++ test/CodeGen/X86/avx512-nontemporal.ll | 19 +++++++++++++++++ test/MC/X86/avx512-encodings.s | 12 +++++++++++ 3 files changed, 60 insertions(+) create mode 100644 test/CodeGen/X86/avx512-nontemporal.ll diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 618f42abe94..7cac5ebbece 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1800,6 +1800,35 @@ def VMOVNTDQAZrm : AVX5128I<0x2A, MRMSrcMem, (outs VR512:$dst), (int_x86_avx512_movntdqa addr:$src))]>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>; +// Prefer non-temporal over temporal versions +let AddedComplexity = 400, SchedRW = [WriteStore] in { + +def VMOVNTPSZmr : AVX512PSI<0x2B, MRMDestMem, (outs), + (ins f512mem:$dst, VR512:$src), + "vmovntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v16f32 VR512:$src), + addr:$dst)], + IIC_SSE_MOVNT>, + EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; + +def VMOVNTPDZmr : AVX512PDI<0x2B, MRMDestMem, (outs), + (ins f512mem:$dst, VR512:$src), + "vmovntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f64 VR512:$src), + addr:$dst)], + IIC_SSE_MOVNT>, + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + + +def VMOVNTDQZmr : AVX512BI<0xE7, MRMDestMem, (outs), + (ins i512mem:$dst, VR512:$src), + "vmovntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8i64 VR512:$src), + addr:$dst)], + IIC_SSE_MOVNT>, + EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>; +} + //===----------------------------------------------------------------------===// // AVX-512 - Integer arithmetic // diff --git a/test/CodeGen/X86/avx512-nontemporal.ll b/test/CodeGen/X86/avx512-nontemporal.ll new file mode 100644 index 00000000000..ef50cdb8283 --- /dev/null +++ b/test/CodeGen/X86/avx512-nontemporal.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -march=x86-64 -mattr=+avx512f | FileCheck %s + +define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x double> %CC, i32 %D, <8 x i64> %E, <8 x i64> %EE) { +; CHECK: vmovntps %z + %cast = bitcast i8* %B to <16 x float>* + %A2 = fadd <16 x float> %A, %AA + store <16 x float> %A2, <16 x float>* %cast, align 64, !nontemporal !0 +; CHECK: vmovntdq %z + %cast1 = bitcast i8* %B to <8 x i64>* + %E2 = add <8 x i64> %E, %EE + store <8 x i64> %E2, <8 x i64>* %cast1, align 64, !nontemporal !0 +; CHECK: vmovntpd %z + %cast2 = bitcast i8* %B to <8 x double>* + %C2 = fadd <8 x double> %C, %CC + store <8 x double> %C2, <8 x double>* %cast2, align 64, !nontemporal !0 + ret void +} + +!0 = metadata !{i32 1} diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s index a7a87cd0b40..b9674231d95 100644 --- a/test/MC/X86/avx512-encodings.s +++ b/test/MC/X86/avx512-encodings.s @@ -3163,3 +3163,15 @@ vmovntdqa (%r14,%rdx,2), %zmm18 // CHECK: vmovntdqa // CHECK: encoding: [0x62,0xc2,0x7d,0x48,0x2a,0x7c,0x14,0x02] vmovntdqa 128(%r12,%rdx), %zmm23 + +// CHECK: vmovntdq +// CHECK: encoding: [0x62,0x21,0x7d,0x48,0xe7,0x24,0xa9] +vmovntdq %zmm28, (%rcx,%r13,4) + +// CHECK: vmovntpd +// CHECK: encoding: [0x62,0xf1,0xfd,0x48,0x2b,0xb2,0x04,0x00,0x00,0x00] +vmovntpd %zmm6, 4(%rdx) + +// CHECK: vmovntps +// CHECK: encoding: [0x62,0x51,0x7c,0x48,0x2b,0x5c,0x8d,0x00] +vmovntps %zmm11, (%r13,%rcx,4) -- 2.34.1