From: Tim Northover Date: Fri, 2 May 2014 14:54:15 +0000 (+0000) Subject: AArch64/ARM64: support indexed loads/stores on vector types. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=6f86e23c1a8375cd47b586efc001330720d24f79;p=oota-llvm.git AArch64/ARM64: support indexed loads/stores on vector types. While post-indexed LD1/ST1 instructions do exist for vector loads, this patch makes use of the more flexible addressing-modes in LDR/STR instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207838 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM64/ARM64AsmPrinter.cpp b/lib/Target/ARM64/ARM64AsmPrinter.cpp index bc6fef1aa43..5531101fe2f 100644 --- a/lib/Target/ARM64/ARM64AsmPrinter.cpp +++ b/lib/Target/ARM64/ARM64AsmPrinter.cpp @@ -427,6 +427,7 @@ static unsigned getRealIndexedOpcode(unsigned Opc) { switch (Opc) { case ARM64::LDRXpre_isel: return ARM64::LDRXpre; case ARM64::LDRWpre_isel: return ARM64::LDRWpre; + case ARM64::LDRQpre_isel: return ARM64::LDRQpre; case ARM64::LDRDpre_isel: return ARM64::LDRDpre; case ARM64::LDRSpre_isel: return ARM64::LDRSpre; case ARM64::LDRBBpre_isel: return ARM64::LDRBBpre; @@ -437,6 +438,7 @@ static unsigned getRealIndexedOpcode(unsigned Opc) { case ARM64::LDRSHXpre_isel: return ARM64::LDRSHXpre; case ARM64::LDRSWpre_isel: return ARM64::LDRSWpre; + case ARM64::LDRQpost_isel: return ARM64::LDRQpost; case ARM64::LDRDpost_isel: return ARM64::LDRDpost; case ARM64::LDRSpost_isel: return ARM64::LDRSpost; case ARM64::LDRXpost_isel: return ARM64::LDRXpost; @@ -453,6 +455,7 @@ static unsigned getRealIndexedOpcode(unsigned Opc) { case ARM64::STRWpre_isel: return ARM64::STRWpre; case ARM64::STRHHpre_isel: return ARM64::STRHHpre; case ARM64::STRBBpre_isel: return ARM64::STRBBpre; + case ARM64::STRQpre_isel: return ARM64::STRQpre; case ARM64::STRDpre_isel: return ARM64::STRDpre; case ARM64::STRSpre_isel: return ARM64::STRSpre; } @@ -494,6 +497,7 @@ void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) { case ARM64::LDRBBpre_isel: case ARM64::LDRXpre_isel: case ARM64::LDRWpre_isel: + case ARM64::LDRQpre_isel: case ARM64::LDRDpre_isel: case ARM64::LDRSpre_isel: case ARM64::LDRSBWpre_isel: @@ -501,6 +505,7 @@ void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) { case ARM64::LDRSHWpre_isel: case ARM64::LDRSHXpre_isel: case ARM64::LDRSWpre_isel: + case ARM64::LDRQpost_isel: case ARM64::LDRDpost_isel: case ARM64::LDRSpost_isel: case ARM64::LDRXpost_isel: @@ -525,6 +530,7 @@ void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) { case ARM64::STRWpre_isel: case ARM64::STRHHpre_isel: case ARM64::STRBBpre_isel: + case ARM64::STRQpre_isel: case ARM64::STRDpre_isel: case ARM64::STRSpre_isel: { MCInst TmpInst; diff --git a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp index 07e47e0f54c..d9c945ccd92 100644 --- a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp +++ b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp @@ -907,8 +907,10 @@ SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { } } else if (VT == MVT::f32) { Opcode = IsPre ? ARM64::LDRSpre_isel : ARM64::LDRSpost_isel; - } else if (VT == MVT::f64) { + } else if (VT == MVT::f64 || VT.is64BitVector()) { Opcode = IsPre ? ARM64::LDRDpre_isel : ARM64::LDRDpost_isel; + } else if (VT.is128BitVector()) { + Opcode = IsPre ? ARM64::LDRQpre_isel : ARM64::LDRQpost_isel; } else return nullptr; SDValue Chain = LD->getChain(); diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp index 6dd588c3705..f95308b5c47 100644 --- a/lib/Target/ARM64/ARM64ISelLowering.cpp +++ b/lib/Target/ARM64/ARM64ISelLowering.cpp @@ -521,6 +521,14 @@ void ARM64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) { setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom); setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom); + + if (Subtarget->isLittleEndian()) { + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, VT.getSimpleVT(), Legal); + setIndexedStoreAction(im, VT.getSimpleVT(), Legal); + } + } } void ARM64TargetLowering::addDRTypeForNEON(MVT VT) { diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td index 455cdf320c0..d2f8452a9fb 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.td +++ b/lib/Target/ARM64/ARM64InstrInfo.td @@ -1503,6 +1503,7 @@ def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32, "ldrh">; def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64, "ldrsw">; // ISel pseudos and patterns. See expanded comment on LoadPreIdxPseudo. +def LDRQpre_isel : LoadPreIdxPseudo; def LDRDpre_isel : LoadPreIdxPseudo; def LDRSpre_isel : LoadPreIdxPseudo; def LDRXpre_isel : LoadPreIdxPseudo; @@ -1542,6 +1543,7 @@ def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32, "ldrh">; def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64, "ldrsw">; // ISel pseudos and patterns. See expanded comment on LoadPostIdxPseudo. +def LDRQpost_isel : LoadPostIdxPseudo; def LDRDpost_isel : LoadPostIdxPseudo; def LDRSpost_isel : LoadPostIdxPseudo; def LDRXpost_isel : LoadPostIdxPseudo; @@ -1812,6 +1814,7 @@ def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32, "strb">; def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32, "strh">; // ISel pseudos and patterns. See expanded comment on StorePreIdxPseudo. +defm STRQpre : StorePreIdxPseudo; defm STRDpre : StorePreIdxPseudo; defm STRSpre : StorePreIdxPseudo; defm STRXpre : StorePreIdxPseudo; @@ -1829,6 +1832,32 @@ def : Pat<(pre_truncsti8 GPR64:$Rt, am_noindex:$addr, simm9:$off), (STRBBpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, simm9:$off)>; +def : Pat<(pre_store (v8i8 FPR64:$Rt), am_noindex:$addr, simm9:$off), + (STRDpre_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(pre_store (v4i16 FPR64:$Rt), am_noindex:$addr, simm9:$off), + (STRDpre_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(pre_store (v2i32 FPR64:$Rt), am_noindex:$addr, simm9:$off), + (STRDpre_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(pre_store (v2f32 FPR64:$Rt), am_noindex:$addr, simm9:$off), + (STRDpre_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(pre_store (v1i64 FPR64:$Rt), am_noindex:$addr, simm9:$off), + (STRDpre_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(pre_store (v1f64 FPR64:$Rt), am_noindex:$addr, simm9:$off), + (STRDpre_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; + +def : Pat<(pre_store (v16i8 FPR128:$Rt), am_noindex:$addr, simm9:$off), + (STRQpre_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(pre_store (v8i16 FPR128:$Rt), am_noindex:$addr, simm9:$off), + (STRQpre_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(pre_store (v4i32 FPR128:$Rt), am_noindex:$addr, simm9:$off), + (STRQpre_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(pre_store (v4f32 FPR128:$Rt), am_noindex:$addr, simm9:$off), + (STRQpre_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(pre_store (v2i64 FPR128:$Rt), am_noindex:$addr, simm9:$off), + (STRQpre_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(pre_store (v2f64 FPR128:$Rt), am_noindex:$addr, simm9:$off), + (STRQpre_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; + //--- // (immediate post-indexed) def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32, "str">; @@ -1843,6 +1872,7 @@ def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32, "strb">; def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32, "strh">; // ISel pseudos and patterns. See expanded comment on StorePostIdxPseudo. +defm STRQpost : StorePostIdxPseudo; defm STRDpost : StorePostIdxPseudo; defm STRSpost : StorePostIdxPseudo; defm STRXpost : StorePostIdxPseudo; @@ -1860,6 +1890,31 @@ def : Pat<(post_truncsti8 GPR64:$Rt, am_noindex:$addr, simm9:$off), (STRBBpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, simm9:$off)>; +def : Pat<(post_store (v8i8 FPR64:$Rt), am_noindex:$addr, simm9:$off), + (STRDpost_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(post_store (v4i16 FPR64:$Rt), am_noindex:$addr, simm9:$off), + (STRDpost_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(post_store (v2i32 FPR64:$Rt), am_noindex:$addr, simm9:$off), + (STRDpost_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(post_store (v2f32 FPR64:$Rt), am_noindex:$addr, simm9:$off), + (STRDpost_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(post_store (v1i64 FPR64:$Rt), am_noindex:$addr, simm9:$off), + (STRDpost_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(post_store (v1f64 FPR64:$Rt), am_noindex:$addr, simm9:$off), + (STRDpost_isel FPR64:$Rt, am_noindex:$addr, simm9:$off)>; + +def : Pat<(post_store (v16i8 FPR128:$Rt), am_noindex:$addr, simm9:$off), + (STRQpost_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(post_store (v8i16 FPR128:$Rt), am_noindex:$addr, simm9:$off), + (STRQpost_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(post_store (v4i32 FPR128:$Rt), am_noindex:$addr, simm9:$off), + (STRQpost_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(post_store (v4f32 FPR128:$Rt), am_noindex:$addr, simm9:$off), + (STRQpost_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(post_store (v2i64 FPR128:$Rt), am_noindex:$addr, simm9:$off), + (STRQpost_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; +def : Pat<(post_store (v2f64 FPR128:$Rt), am_noindex:$addr, simm9:$off), + (STRQpost_isel FPR128:$Rt, am_noindex:$addr, simm9:$off)>; //===----------------------------------------------------------------------===// // Load/store exclusive instructions. diff --git a/test/CodeGen/ARM64/indexed-vector-ldst.ll b/test/CodeGen/ARM64/indexed-vector-ldst.ll new file mode 100644 index 00000000000..1f510b07a16 --- /dev/null +++ b/test/CodeGen/ARM64/indexed-vector-ldst.ll @@ -0,0 +1,402 @@ +; RUN: llc -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s + +@ptr = global i8* null + +define <8 x i8> @test_v8i8_pre_load(<8 x i8>* %addr) { +; CHECK-LABEL: test_v8i8_pre_load: +; CHECK: ldr d0, [x0, #40]! + %newaddr = getelementptr <8 x i8>* %addr, i32 5 + %val = load <8 x i8>* %newaddr, align 8 + store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**) + ret <8 x i8> %val +} + +define <8 x i8> @test_v8i8_post_load(<8 x i8>* %addr) { +; CHECK-LABEL: test_v8i8_post_load: +; CHECK: ldr d0, [x0], #40 + %newaddr = getelementptr <8 x i8>* %addr, i32 5 + %val = load <8 x i8>* %addr, align 8 + store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**) + ret <8 x i8> %val +} + +define void @test_v8i8_pre_store(<8 x i8> %in, <8 x i8>* %addr) { +; CHECK-LABEL: test_v8i8_pre_store: +; CHECK: str d0, [x0, #40]! + %newaddr = getelementptr <8 x i8>* %addr, i32 5 + store <8 x i8> %in, <8 x i8>* %newaddr, align 8 + store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**) + ret void +} + +define void @test_v8i8_post_store(<8 x i8> %in, <8 x i8>* %addr) { +; CHECK-LABEL: test_v8i8_post_store: +; CHECK: str d0, [x0], #40 + %newaddr = getelementptr <8 x i8>* %addr, i32 5 + store <8 x i8> %in, <8 x i8>* %addr, align 8 + store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**) + ret void +} + +define <4 x i16> @test_v4i16_pre_load(<4 x i16>* %addr) { +; CHECK-LABEL: test_v4i16_pre_load: +; CHECK: ldr d0, [x0, #40]! + %newaddr = getelementptr <4 x i16>* %addr, i32 5 + %val = load <4 x i16>* %newaddr, align 8 + store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**) + ret <4 x i16> %val +} + +define <4 x i16> @test_v4i16_post_load(<4 x i16>* %addr) { +; CHECK-LABEL: test_v4i16_post_load: +; CHECK: ldr d0, [x0], #40 + %newaddr = getelementptr <4 x i16>* %addr, i32 5 + %val = load <4 x i16>* %addr, align 8 + store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**) + ret <4 x i16> %val +} + +define void @test_v4i16_pre_store(<4 x i16> %in, <4 x i16>* %addr) { +; CHECK-LABEL: test_v4i16_pre_store: +; CHECK: str d0, [x0, #40]! + %newaddr = getelementptr <4 x i16>* %addr, i32 5 + store <4 x i16> %in, <4 x i16>* %newaddr, align 8 + store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**) + ret void +} + +define void @test_v4i16_post_store(<4 x i16> %in, <4 x i16>* %addr) { +; CHECK-LABEL: test_v4i16_post_store: +; CHECK: str d0, [x0], #40 + %newaddr = getelementptr <4 x i16>* %addr, i32 5 + store <4 x i16> %in, <4 x i16>* %addr, align 8 + store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**) + ret void +} + +define <2 x i32> @test_v2i32_pre_load(<2 x i32>* %addr) { +; CHECK-LABEL: test_v2i32_pre_load: +; CHECK: ldr d0, [x0, #40]! + %newaddr = getelementptr <2 x i32>* %addr, i32 5 + %val = load <2 x i32>* %newaddr, align 8 + store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**) + ret <2 x i32> %val +} + +define <2 x i32> @test_v2i32_post_load(<2 x i32>* %addr) { +; CHECK-LABEL: test_v2i32_post_load: +; CHECK: ldr d0, [x0], #40 + %newaddr = getelementptr <2 x i32>* %addr, i32 5 + %val = load <2 x i32>* %addr, align 8 + store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**) + ret <2 x i32> %val +} + +define void @test_v2i32_pre_store(<2 x i32> %in, <2 x i32>* %addr) { +; CHECK-LABEL: test_v2i32_pre_store: +; CHECK: str d0, [x0, #40]! + %newaddr = getelementptr <2 x i32>* %addr, i32 5 + store <2 x i32> %in, <2 x i32>* %newaddr, align 8 + store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**) + ret void +} + +define void @test_v2i32_post_store(<2 x i32> %in, <2 x i32>* %addr) { +; CHECK-LABEL: test_v2i32_post_store: +; CHECK: str d0, [x0], #40 + %newaddr = getelementptr <2 x i32>* %addr, i32 5 + store <2 x i32> %in, <2 x i32>* %addr, align 8 + store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**) + ret void +} + +define <2 x float> @test_v2f32_pre_load(<2 x float>* %addr) { +; CHECK-LABEL: test_v2f32_pre_load: +; CHECK: ldr d0, [x0, #40]! + %newaddr = getelementptr <2 x float>* %addr, i32 5 + %val = load <2 x float>* %newaddr, align 8 + store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**) + ret <2 x float> %val +} + +define <2 x float> @test_v2f32_post_load(<2 x float>* %addr) { +; CHECK-LABEL: test_v2f32_post_load: +; CHECK: ldr d0, [x0], #40 + %newaddr = getelementptr <2 x float>* %addr, i32 5 + %val = load <2 x float>* %addr, align 8 + store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**) + ret <2 x float> %val +} + +define void @test_v2f32_pre_store(<2 x float> %in, <2 x float>* %addr) { +; CHECK-LABEL: test_v2f32_pre_store: +; CHECK: str d0, [x0, #40]! + %newaddr = getelementptr <2 x float>* %addr, i32 5 + store <2 x float> %in, <2 x float>* %newaddr, align 8 + store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**) + ret void +} + +define void @test_v2f32_post_store(<2 x float> %in, <2 x float>* %addr) { +; CHECK-LABEL: test_v2f32_post_store: +; CHECK: str d0, [x0], #40 + %newaddr = getelementptr <2 x float>* %addr, i32 5 + store <2 x float> %in, <2 x float>* %addr, align 8 + store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**) + ret void +} + +define <1 x i64> @test_v1i64_pre_load(<1 x i64>* %addr) { +; CHECK-LABEL: test_v1i64_pre_load: +; CHECK: ldr d0, [x0, #40]! + %newaddr = getelementptr <1 x i64>* %addr, i32 5 + %val = load <1 x i64>* %newaddr, align 8 + store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**) + ret <1 x i64> %val +} + +define <1 x i64> @test_v1i64_post_load(<1 x i64>* %addr) { +; CHECK-LABEL: test_v1i64_post_load: +; CHECK: ldr d0, [x0], #40 + %newaddr = getelementptr <1 x i64>* %addr, i32 5 + %val = load <1 x i64>* %addr, align 8 + store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**) + ret <1 x i64> %val +} + +define void @test_v1i64_pre_store(<1 x i64> %in, <1 x i64>* %addr) { +; CHECK-LABEL: test_v1i64_pre_store: +; CHECK: str d0, [x0, #40]! + %newaddr = getelementptr <1 x i64>* %addr, i32 5 + store <1 x i64> %in, <1 x i64>* %newaddr, align 8 + store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**) + ret void +} + +define void @test_v1i64_post_store(<1 x i64> %in, <1 x i64>* %addr) { +; CHECK-LABEL: test_v1i64_post_store: +; CHECK: str d0, [x0], #40 + %newaddr = getelementptr <1 x i64>* %addr, i32 5 + store <1 x i64> %in, <1 x i64>* %addr, align 8 + store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**) + ret void +} + +define <16 x i8> @test_v16i8_pre_load(<16 x i8>* %addr) { +; CHECK-LABEL: test_v16i8_pre_load: +; CHECK: ldr q0, [x0, #80]! + %newaddr = getelementptr <16 x i8>* %addr, i32 5 + %val = load <16 x i8>* %newaddr, align 8 + store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**) + ret <16 x i8> %val +} + +define <16 x i8> @test_v16i8_post_load(<16 x i8>* %addr) { +; CHECK-LABEL: test_v16i8_post_load: +; CHECK: ldr q0, [x0], #80 + %newaddr = getelementptr <16 x i8>* %addr, i32 5 + %val = load <16 x i8>* %addr, align 8 + store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**) + ret <16 x i8> %val +} + +define void @test_v16i8_pre_store(<16 x i8> %in, <16 x i8>* %addr) { +; CHECK-LABEL: test_v16i8_pre_store: +; CHECK: str q0, [x0, #80]! + %newaddr = getelementptr <16 x i8>* %addr, i32 5 + store <16 x i8> %in, <16 x i8>* %newaddr, align 8 + store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**) + ret void +} + +define void @test_v16i8_post_store(<16 x i8> %in, <16 x i8>* %addr) { +; CHECK-LABEL: test_v16i8_post_store: +; CHECK: str q0, [x0], #80 + %newaddr = getelementptr <16 x i8>* %addr, i32 5 + store <16 x i8> %in, <16 x i8>* %addr, align 8 + store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**) + ret void +} + +define <8 x i16> @test_v8i16_pre_load(<8 x i16>* %addr) { +; CHECK-LABEL: test_v8i16_pre_load: +; CHECK: ldr q0, [x0, #80]! + %newaddr = getelementptr <8 x i16>* %addr, i32 5 + %val = load <8 x i16>* %newaddr, align 8 + store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**) + ret <8 x i16> %val +} + +define <8 x i16> @test_v8i16_post_load(<8 x i16>* %addr) { +; CHECK-LABEL: test_v8i16_post_load: +; CHECK: ldr q0, [x0], #80 + %newaddr = getelementptr <8 x i16>* %addr, i32 5 + %val = load <8 x i16>* %addr, align 8 + store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**) + ret <8 x i16> %val +} + +define void @test_v8i16_pre_store(<8 x i16> %in, <8 x i16>* %addr) { +; CHECK-LABEL: test_v8i16_pre_store: +; CHECK: str q0, [x0, #80]! + %newaddr = getelementptr <8 x i16>* %addr, i32 5 + store <8 x i16> %in, <8 x i16>* %newaddr, align 8 + store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**) + ret void +} + +define void @test_v8i16_post_store(<8 x i16> %in, <8 x i16>* %addr) { +; CHECK-LABEL: test_v8i16_post_store: +; CHECK: str q0, [x0], #80 + %newaddr = getelementptr <8 x i16>* %addr, i32 5 + store <8 x i16> %in, <8 x i16>* %addr, align 8 + store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**) + ret void +} + +define <4 x i32> @test_v4i32_pre_load(<4 x i32>* %addr) { +; CHECK-LABEL: test_v4i32_pre_load: +; CHECK: ldr q0, [x0, #80]! + %newaddr = getelementptr <4 x i32>* %addr, i32 5 + %val = load <4 x i32>* %newaddr, align 8 + store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**) + ret <4 x i32> %val +} + +define <4 x i32> @test_v4i32_post_load(<4 x i32>* %addr) { +; CHECK-LABEL: test_v4i32_post_load: +; CHECK: ldr q0, [x0], #80 + %newaddr = getelementptr <4 x i32>* %addr, i32 5 + %val = load <4 x i32>* %addr, align 8 + store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**) + ret <4 x i32> %val +} + +define void @test_v4i32_pre_store(<4 x i32> %in, <4 x i32>* %addr) { +; CHECK-LABEL: test_v4i32_pre_store: +; CHECK: str q0, [x0, #80]! + %newaddr = getelementptr <4 x i32>* %addr, i32 5 + store <4 x i32> %in, <4 x i32>* %newaddr, align 8 + store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**) + ret void +} + +define void @test_v4i32_post_store(<4 x i32> %in, <4 x i32>* %addr) { +; CHECK-LABEL: test_v4i32_post_store: +; CHECK: str q0, [x0], #80 + %newaddr = getelementptr <4 x i32>* %addr, i32 5 + store <4 x i32> %in, <4 x i32>* %addr, align 8 + store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**) + ret void +} + + +define <4 x float> @test_v4f32_pre_load(<4 x float>* %addr) { +; CHECK-LABEL: test_v4f32_pre_load: +; CHECK: ldr q0, [x0, #80]! + %newaddr = getelementptr <4 x float>* %addr, i32 5 + %val = load <4 x float>* %newaddr, align 8 + store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**) + ret <4 x float> %val +} + +define <4 x float> @test_v4f32_post_load(<4 x float>* %addr) { +; CHECK-LABEL: test_v4f32_post_load: +; CHECK: ldr q0, [x0], #80 + %newaddr = getelementptr <4 x float>* %addr, i32 5 + %val = load <4 x float>* %addr, align 8 + store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**) + ret <4 x float> %val +} + +define void @test_v4f32_pre_store(<4 x float> %in, <4 x float>* %addr) { +; CHECK-LABEL: test_v4f32_pre_store: +; CHECK: str q0, [x0, #80]! + %newaddr = getelementptr <4 x float>* %addr, i32 5 + store <4 x float> %in, <4 x float>* %newaddr, align 8 + store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**) + ret void +} + +define void @test_v4f32_post_store(<4 x float> %in, <4 x float>* %addr) { +; CHECK-LABEL: test_v4f32_post_store: +; CHECK: str q0, [x0], #80 + %newaddr = getelementptr <4 x float>* %addr, i32 5 + store <4 x float> %in, <4 x float>* %addr, align 8 + store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**) + ret void +} + + +define <2 x i64> @test_v2i64_pre_load(<2 x i64>* %addr) { +; CHECK-LABEL: test_v2i64_pre_load: +; CHECK: ldr q0, [x0, #80]! + %newaddr = getelementptr <2 x i64>* %addr, i32 5 + %val = load <2 x i64>* %newaddr, align 8 + store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**) + ret <2 x i64> %val +} + +define <2 x i64> @test_v2i64_post_load(<2 x i64>* %addr) { +; CHECK-LABEL: test_v2i64_post_load: +; CHECK: ldr q0, [x0], #80 + %newaddr = getelementptr <2 x i64>* %addr, i32 5 + %val = load <2 x i64>* %addr, align 8 + store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**) + ret <2 x i64> %val +} + +define void @test_v2i64_pre_store(<2 x i64> %in, <2 x i64>* %addr) { +; CHECK-LABEL: test_v2i64_pre_store: +; CHECK: str q0, [x0, #80]! + %newaddr = getelementptr <2 x i64>* %addr, i32 5 + store <2 x i64> %in, <2 x i64>* %newaddr, align 8 + store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**) + ret void +} + +define void @test_v2i64_post_store(<2 x i64> %in, <2 x i64>* %addr) { +; CHECK-LABEL: test_v2i64_post_store: +; CHECK: str q0, [x0], #80 + %newaddr = getelementptr <2 x i64>* %addr, i32 5 + store <2 x i64> %in, <2 x i64>* %addr, align 8 + store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**) + ret void +} + + +define <2 x double> @test_v2f64_pre_load(<2 x double>* %addr) { +; CHECK-LABEL: test_v2f64_pre_load: +; CHECK: ldr q0, [x0, #80]! + %newaddr = getelementptr <2 x double>* %addr, i32 5 + %val = load <2 x double>* %newaddr, align 8 + store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**) + ret <2 x double> %val +} + +define <2 x double> @test_v2f64_post_load(<2 x double>* %addr) { +; CHECK-LABEL: test_v2f64_post_load: +; CHECK: ldr q0, [x0], #80 + %newaddr = getelementptr <2 x double>* %addr, i32 5 + %val = load <2 x double>* %addr, align 8 + store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**) + ret <2 x double> %val +} + +define void @test_v2f64_pre_store(<2 x double> %in, <2 x double>* %addr) { +; CHECK-LABEL: test_v2f64_pre_store: +; CHECK: str q0, [x0, #80]! + %newaddr = getelementptr <2 x double>* %addr, i32 5 + store <2 x double> %in, <2 x double>* %newaddr, align 8 + store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**) + ret void +} + +define void @test_v2f64_post_store(<2 x double> %in, <2 x double>* %addr) { +; CHECK-LABEL: test_v2f64_post_store: +; CHECK: str q0, [x0], #80 + %newaddr = getelementptr <2 x double>* %addr, i32 5 + store <2 x double> %in, <2 x double>* %addr, align 8 + store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**) + ret void +}