From: Elena Demikhovsky Date: Tue, 22 Oct 2013 09:19:28 +0000 (+0000) Subject: AVX-512: aligned / unaligned load and store for 512-bit integer vectors. X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=ea79feb1a87af1e0e9c0fd3bf8831c4593b56d4d AVX-512: aligned / unaligned load and store for 512-bit integer vectors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193156 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 05e346dec5a..8cf5bb43717 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1067,23 +1067,6 @@ def VMOVUPDZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$sr SSEPackedDouble>, EVEX, EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; -// Use vmovaps/vmovups for AVX-512 integer load/store. -// 512-bit load/store -def : Pat<(alignedloadv8i64 addr:$src), - (VMOVAPSZrm addr:$src)>; -def : Pat<(loadv8i64 addr:$src), - (VMOVUPSZrm addr:$src)>; - -def : Pat<(alignedstore512 (v8i64 VR512:$src), addr:$dst), - (VMOVAPSZmr addr:$dst, VR512:$src)>; -def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst), - (VMOVAPSZmr addr:$dst, VR512:$src)>; - -def : Pat<(store (v8i64 VR512:$src), addr:$dst), - (VMOVUPDZmr addr:$dst, VR512:$src)>; -def : Pat<(store (v16i32 VR512:$src), addr:$dst), - (VMOVUPSZmr addr:$dst, VR512:$src)>; - let neverHasSideEffects = 1 in { def VMOVDQA32rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src), @@ -1115,25 +1098,36 @@ def VMOVDQA64rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst), } } -multiclass avx512_mov_int opc, string asm, RegisterClass RC, - RegisterClass KRC, +// 512-bit aligned load/store +def : Pat<(alignedloadv8i64 addr:$src), (VMOVDQA64rm addr:$src)>; +def : Pat<(alignedloadv16i32 addr:$src), (VMOVDQA32rm addr:$src)>; + +def : Pat<(alignedstore512 (v8i64 VR512:$src), addr:$dst), + (VMOVDQA64mr addr:$dst, VR512:$src)>; +def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst), + (VMOVDQA32mr addr:$dst, VR512:$src)>; + +multiclass avx512_mov_int load_opc, bits<8> store_opc, string asm, + RegisterClass RC, RegisterClass KRC, PatFrag ld_frag, X86MemOperand x86memop> { let neverHasSideEffects = 1 in - def rr : AVX512XSI, - EVEX; + def rr : AVX512XSI, EVEX; let canFoldAsLoad = 1 in - def rm : AVX512XSI, - EVEX; + def rm : AVX512XSI, EVEX; +let mayStore = 1 in + def mr : AVX512XSI, EVEX; let Constraints = "$src1 = $dst" in { - def rrk : AVX512XSI, EVEX, EVEX_K; - def rmk : AVX512XSI, +defm VMOVDQU32 : avx512_mov_int<0x6F, 0x7F, "vmovdqu32", VR512, VK16WM, + memopv16i32, i512mem>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, memopv8i64, i512mem>, +defm VMOVDQU64 : avx512_mov_int<0x6F, 0x7F, "vmovdqu64", VR512, VK8WM, + memopv8i64, i512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +// 512-bit unaligned load/store +def : Pat<(loadv8i64 addr:$src), (VMOVDQU64rm addr:$src)>; +def : Pat<(loadv16i32 addr:$src), (VMOVDQU32rm addr:$src)>; + +def : Pat<(store (v8i64 VR512:$src), addr:$dst), + (VMOVDQU64mr addr:$dst, VR512:$src)>; +def : Pat<(store (v16i32 VR512:$src), addr:$dst), + (VMOVDQU32mr addr:$dst, VR512:$src)>; + let AddedComplexity = 20 in { def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1), (v16f32 VR512:$src2))), diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index c86b512b795..1fed424fd6e 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -289,6 +289,7 @@ def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; // 512-bit load pattern fragments def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>; def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>; +def loadv16i32 : PatFrag<(ops node:$ptr), (v16i32 (load node:$ptr))>; def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>; // 128-/256-/512-bit extload pattern fragments diff --git a/test/CodeGen/X86/avx512-mov.ll b/test/CodeGen/X86/avx512-mov.ll index 6c5c586afd0..91242b1cc12 100644 --- a/test/CodeGen/X86/avx512-mov.ll +++ b/test/CodeGen/X86/avx512-mov.ll @@ -125,3 +125,31 @@ define <4 x i32> @test15(i32* %x) { %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 ret <4 x i32>%res } + +; CHECK-LABEL: test16 +; CHECK: vmovdqu32 +; CHECK: ret +define <16 x i32> @test16(i8 * %addr) { + %vaddr = bitcast i8* %addr to <16 x i32>* + %res = load <16 x i32>* %vaddr, align 1 + ret <16 x i32>%res +} + +; CHECK-LABEL: test17 +; CHECK: vmovdqa32 +; CHECK: ret +define <16 x i32> @test17(i8 * %addr) { + %vaddr = bitcast i8* %addr to <16 x i32>* + %res = load <16 x i32>* %vaddr, align 64 + ret <16 x i32>%res +} + +; CHECK-LABEL: test18 +; CHECK: vmovdqa64 +; CHECK: ret +define void @test18(i8 * %addr, <8 x i64> %data) { + %vaddr = bitcast i8* %addr to <8 x i64>* + store <8 x i64>%data, <8 x i64>* %vaddr, align 64 + ret void +} +