From ec4188bad7f8ed75dbd9f592fe34b1c9502ca250 Mon Sep 17 00:00:00 2001 From: Robert Khasanov Date: Wed, 6 Aug 2014 15:40:34 +0000 Subject: [PATCH] [AVX512] Added load/store instructions to Register2Memory opcode tables. Added lowering tests for load/store. Reviewed by Elena Demikhovsky git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214972 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 16 +- test/CodeGen/X86/avx512-mov.ll | 293 ++++++++++++++++++++++++++++++++ 2 files changed, 307 insertions(+), 2 deletions(-) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 36ee4a2d26b..0d46f706906 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -377,7 +377,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE }, { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE }, // AVX-512 foldable instructions - { X86::VMOVPDI2DIZrr,X86::VMOVPDI2DIZmr, TB_FOLDED_STORE } + { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE }, + { X86::VMOVAPDZrr, X86::VMOVAPDZmr, TB_FOLDED_STORE | TB_ALIGN_64 }, + { X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE | TB_ALIGN_64 }, + { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE | TB_ALIGN_64 }, + { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zmr, TB_FOLDED_STORE | TB_ALIGN_64 }, + { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE }, + { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE }, + { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE }, + { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE } }; for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { @@ -601,10 +609,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // AVX-512 foldable instructions { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 }, { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 }, + { X86::VMOVAPDZrr, X86::VMOVAPDZrm, TB_ALIGN_64 }, + { X86::VMOVAPSZrr, X86::VMOVAPSZrm, TB_ALIGN_64 }, { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 }, { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrm, TB_ALIGN_64 }, { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zrm, 0 }, { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zrm, 0 }, + { X86::VMOVUPDZrr, X86::VMOVUPDZrm, 0 }, + { X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 }, { X86::VPABSDZrr, X86::VPABSDZrm, 0 }, { X86::VPABSQZrr, X86::VPABSQZrm, 0 }, @@ -612,7 +624,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 }, { X86::AESKEYGENASSIST128rr, X86::AESKEYGENASSIST128rm, TB_ALIGN_16 }, { X86::VAESIMCrr, X86::VAESIMCrm, TB_ALIGN_16 }, - { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, TB_ALIGN_16 }, + { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, TB_ALIGN_16 } }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { diff --git a/test/CodeGen/X86/avx512-mov.ll b/test/CodeGen/X86/avx512-mov.ll index 6d80c994e10..93875e839e2 100644 --- a/test/CodeGen/X86/avx512-mov.ll +++ b/test/CodeGen/X86/avx512-mov.ll @@ -152,3 +152,296 @@ define void @test18(i8 * %addr, <8 x i64> %data) { store <8 x i64>%data, <8 x i64>* %vaddr, align 64 ret void } + +; CHECK-LABEL: test19 +; CHECK: vmovdqu32 +; CHECK: ret +define void @test19(i8 * %addr, <16 x i32> %data) { + %vaddr = bitcast i8* %addr to <16 x i32>* + store <16 x i32>%data, <16 x i32>* %vaddr, align 1 + ret void +} + +; CHECK-LABEL: test20 +; CHECK: vmovdqa32 +; CHECK: ret +define void @test20(i8 * %addr, <16 x i32> %data) { + %vaddr = bitcast i8* %addr to <16 x i32>* + store <16 x i32>%data, <16 x i32>* %vaddr, align 64 + ret void +} + +; CHECK-LABEL: test21 +; CHECK: vmovdqa64 +; CHECK: ret +define <8 x i64> @test21(i8 * %addr) { + %vaddr = bitcast i8* %addr to <8 x i64>* + %res = load <8 x i64>* %vaddr, align 64 + ret <8 x i64>%res +} + +; CHECK-LABEL: test22 +; CHECK: vmovdqu64 +; CHECK: ret +define void @test22(i8 * %addr, <8 x i64> %data) { + %vaddr = bitcast i8* %addr to <8 x i64>* + store <8 x i64>%data, <8 x i64>* %vaddr, align 1 + ret void +} + +; CHECK-LABEL: test23 +; CHECK: vmovdqu64 +; CHECK: ret +define <8 x i64> @test23(i8 * %addr) { + %vaddr = bitcast i8* %addr to <8 x i64>* + %res = load <8 x i64>* %vaddr, align 1 + ret <8 x i64>%res +} + +; CHECK-LABEL: test24 +; CHECK: vmovapd +; CHECK: ret +define void @test24(i8 * %addr, <8 x double> %data) { + %vaddr = bitcast i8* %addr to <8 x double>* + store <8 x double>%data, <8 x double>* %vaddr, align 64 + ret void +} + +; CHECK-LABEL: test25 +; CHECK: vmovapd +; CHECK: ret +define <8 x double> @test25(i8 * %addr) { + %vaddr = bitcast i8* %addr to <8 x double>* + %res = load <8 x double>* %vaddr, align 64 + ret <8 x double>%res +} + +; CHECK-LABEL: test26 +; CHECK: vmovaps +; CHECK: ret +define void @test26(i8 * %addr, <16 x float> %data) { + %vaddr = bitcast i8* %addr to <16 x float>* + store <16 x float>%data, <16 x float>* %vaddr, align 64 + ret void +} + +; CHECK-LABEL: test27 +; CHECK: vmovaps +; CHECK: ret +define <16 x float> @test27(i8 * %addr) { + %vaddr = bitcast i8* %addr to <16 x float>* + %res = load <16 x float>* %vaddr, align 64 + ret <16 x float>%res +} + +; CHECK-LABEL: test28 +; CHECK: vmovupd +; CHECK: ret +define void @test28(i8 * %addr, <8 x double> %data) { + %vaddr = bitcast i8* %addr to <8 x double>* + store <8 x double>%data, <8 x double>* %vaddr, align 1 + ret void +} + +; CHECK-LABEL: test29 +; CHECK: vmovupd +; CHECK: ret +define <8 x double> @test29(i8 * %addr) { + %vaddr = bitcast i8* %addr to <8 x double>* + %res = load <8 x double>* %vaddr, align 1 + ret <8 x double>%res +} + +; CHECK-LABEL: test30 +; CHECK: vmovups +; CHECK: ret +define void @test30(i8 * %addr, <16 x float> %data) { + %vaddr = bitcast i8* %addr to <16 x float>* + store <16 x float>%data, <16 x float>* %vaddr, align 1 + ret void +} + +; CHECK-LABEL: test31 +; CHECK: vmovups +; CHECK: ret +define <16 x float> @test31(i8 * %addr) { + %vaddr = bitcast i8* %addr to <16 x float>* + %res = load <16 x float>* %vaddr, align 1 + ret <16 x float>%res +} + +; CHECK-LABEL: test32 +; CHECK: vmovdqa32{{.*{%k[1-7]} }} +; CHECK: ret +define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %vaddr = bitcast i8* %addr to <16 x i32>* + %r = load <16 x i32>* %vaddr, align 64 + %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old + ret <16 x i32>%res +} + +; CHECK-LABEL: test33 +; CHECK: vmovdqu32{{.*{%k[1-7]} }} +; CHECK: ret +define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %vaddr = bitcast i8* %addr to <16 x i32>* + %r = load <16 x i32>* %vaddr, align 1 + %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old + ret <16 x i32>%res +} + +; CHECK-LABEL: test34 +; CHECK: vmovdqa32{{.*{%k[1-7]} {z} }} +; CHECK: ret +define <16 x i32> @test34(i8 * %addr, <16 x i32> %mask1) { + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %vaddr = bitcast i8* %addr to <16 x i32>* + %r = load <16 x i32>* %vaddr, align 64 + %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer + ret <16 x i32>%res +} + +; CHECK-LABEL: test35 +; CHECK: vmovdqu32{{.*{%k[1-7]} {z} }} +; CHECK: ret +define <16 x i32> @test35(i8 * %addr, <16 x i32> %mask1) { + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %vaddr = bitcast i8* %addr to <16 x i32>* + %r = load <16 x i32>* %vaddr, align 1 + %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer + ret <16 x i32>%res +} + +; CHECK-LABEL: test36 +; CHECK: vmovdqa64{{.*{%k[1-7]} }} +; CHECK: ret +define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { + %mask = icmp ne <8 x i64> %mask1, zeroinitializer + %vaddr = bitcast i8* %addr to <8 x i64>* + %r = load <8 x i64>* %vaddr, align 64 + %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old + ret <8 x i64>%res +} + +; CHECK-LABEL: test37 +; CHECK: vmovdqu64{{.*{%k[1-7]} }} +; CHECK: ret +define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { + %mask = icmp ne <8 x i64> %mask1, zeroinitializer + %vaddr = bitcast i8* %addr to <8 x i64>* + %r = load <8 x i64>* %vaddr, align 1 + %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old + ret <8 x i64>%res +} + +; CHECK-LABEL: test38 +; CHECK: vmovdqa64{{.*{%k[1-7]} {z} }} +; CHECK: ret +define <8 x i64> @test38(i8 * %addr, <8 x i64> %mask1) { + %mask = icmp ne <8 x i64> %mask1, zeroinitializer + %vaddr = bitcast i8* %addr to <8 x i64>* + %r = load <8 x i64>* %vaddr, align 64 + %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer + ret <8 x i64>%res +} + +; CHECK-LABEL: test39 +; CHECK: vmovdqu64{{.*{%k[1-7]} {z} }} +; CHECK: ret +define <8 x i64> @test39(i8 * %addr, <8 x i64> %mask1) { + %mask = icmp ne <8 x i64> %mask1, zeroinitializer + %vaddr = bitcast i8* %addr to <8 x i64>* + %r = load <8 x i64>* %vaddr, align 1 + %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer + ret <8 x i64>%res +} + +; CHECK-LABEL: test40 +; CHECK: vmovaps{{.*{%k[1-7]} }} +; CHECK: ret +define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { + %mask = fcmp one <16 x float> %mask1, zeroinitializer + %vaddr = bitcast i8* %addr to <16 x float>* + %r = load <16 x float>* %vaddr, align 64 + %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old + ret <16 x float>%res +} + +; CHECK-LABEL: test41 +; CHECK: vmovups{{.*{%k[1-7]} }} +; CHECK: ret +define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { + %mask = fcmp one <16 x float> %mask1, zeroinitializer + %vaddr = bitcast i8* %addr to <16 x float>* + %r = load <16 x float>* %vaddr, align 1 + %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old + ret <16 x float>%res +} + +; CHECK-LABEL: test42 +; CHECK: vmovaps{{.*{%k[1-7]} {z} }} +; CHECK: ret +define <16 x float> @test42(i8 * %addr, <16 x float> %mask1) { + %mask = fcmp one <16 x float> %mask1, zeroinitializer + %vaddr = bitcast i8* %addr to <16 x float>* + %r = load <16 x float>* %vaddr, align 64 + %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer + ret <16 x float>%res +} + +; CHECK-LABEL: test43 +; CHECK: vmovups{{.*{%k[1-7]} {z} }} +; CHECK: ret +define <16 x float> @test43(i8 * %addr, <16 x float> %mask1) { + %mask = fcmp one <16 x float> %mask1, zeroinitializer + %vaddr = bitcast i8* %addr to <16 x float>* + %r = load <16 x float>* %vaddr, align 1 + %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer + ret <16 x float>%res +} + +; CHECK-LABEL: test44 +; CHECK: vmovapd{{.*{%k[1-7]} }} +; CHECK: ret +define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { + %mask = fcmp one <8 x double> %mask1, zeroinitializer + %vaddr = bitcast i8* %addr to <8 x double>* + %r = load <8 x double>* %vaddr, align 64 + %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old + ret <8 x double>%res +} + +; CHECK-LABEL: test45 +; CHECK: vmovupd{{.*{%k[1-7]} }} +; CHECK: ret +define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { + %mask = fcmp one <8 x double> %mask1, zeroinitializer + %vaddr = bitcast i8* %addr to <8 x double>* + %r = load <8 x double>* %vaddr, align 1 + %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old + ret <8 x double>%res +} + +; CHECK-LABEL: test46 +; CHECK: vmovapd{{.*{%k[1-7]} {z} }} +; CHECK: ret +define <8 x double> @test46(i8 * %addr, <8 x double> %mask1) { + %mask = fcmp one <8 x double> %mask1, zeroinitializer + %vaddr = bitcast i8* %addr to <8 x double>* + %r = load <8 x double>* %vaddr, align 64 + %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer + ret <8 x double>%res +} + +; CHECK-LABEL: test47 +; CHECK: vmovupd{{.*{%k[1-7]} {z} }} +; CHECK: ret +define <8 x double> @test47(i8 * %addr, <8 x double> %mask1) { + %mask = fcmp one <8 x double> %mask1, zeroinitializer + %vaddr = bitcast i8* %addr to <8 x double>* + %r = load <8 x double>* %vaddr, align 1 + %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer + ret <8 x double>%res +} -- 2.34.1