; CHECK-LABEL: t5:
; CHECK: vld1.32 {[[REG1:d[0-9]+]][1]}, [r0]
; CHECK: vorr [[REG2:d[0-9]+]], [[REG1]], [[REG1]]
-; CHECK-DAG: vld1.32 {[[REG1]][0]}, [r1]
-; CHECK-DAG: vld1.32 {[[REG2]][0]}, [r2]
+; CHECK: vld1.32 {[[REG1]][0]}, [r1]
+; CHECK: vld1.32 {[[REG2]][0]}, [r2]
; CHECK: vmull.u8 q{{[0-9]+}}, [[REG1]], [[REG2]]
define <8 x i16> @t5(i8* nocapture %sp0, i8* nocapture %sp1, i8* nocapture %sp2) {
entry:
define <2 x i8> @test_truncate(<2 x i128> %in) {
; CHECK-LABEL: test_truncate:
; CHECK: mov [[BASE:r[0-9]+]], sp
-; CHECK-DAG: vld1.32 {[[REG1:d[0-9]+]][0]}, {{\[}}[[BASE]]:32]
-; CHECK-DAG: add [[BASE2:r[0-9]+]], [[BASE]], #4
-; CHECK-DAG: vld1.32 {[[REG1]][1]}, {{\[}}[[BASE2]]:32]
+; CHECK-NEXT: vld1.32 {[[REG1:d[0-9]+]][0]}, {{\[}}[[BASE]]:32]
+; CHECK-NEXT: add [[BASE2:r[0-9]+]], [[BASE]], #4
+; CHECK-NEXT: vld1.32 {[[REG1]][1]}, {{\[}}[[BASE2]]:32]
; REG2 Should map on the same Q register as REG1, i.e., REG2 = REG1 - 1, but we
; cannot express that.
-; CHECK-DAG: vmov.32 [[REG2:d[0-9]+]][0], r0
-; CHECK-DAG: vmov.32 [[REG2]][1], r1
+; CHECK-NEXT: vmov.32 [[REG2:d[0-9]+]][0], r0
+; CHECK-NEXT: vmov.32 [[REG2]][1], r1
; The Q register used here should match floor(REG1/2), but we cannot express that.
-; CHECK: vmovn.i64 [[RES:d[0-9]+]], q{{[0-9]+}}
+; CHECK-NEXT: vmovn.i64 [[RES:d[0-9]+]], q{{[0-9]+}}
; CHECK-NEXT: vmov r0, r1, [[RES]]
entry:
%res = trunc <2 x i128> %in to <2 x i8>
define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vst1lanei8:
;Check the (default) alignment.
-;CHECK: vst1.8 {{{d[0-9]+}}[3]}, [r0]
+;CHECK: vst1.8 {d16[3]}, [r0]
%tmp1 = load <8 x i8>* %B
%tmp2 = extractelement <8 x i8> %tmp1, i32 3
store i8 %tmp2, i8* %A, align 8
;Check for a post-increment updating store.
define void @vst1lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vst1lanei8_update:
-;CHECK: vst1.8 {d16[3]}, [{{r[0-9]+}}]!
+;CHECK: vst1.8 {d16[3]}, [{{r[0-9]}}]!
%A = load i8** %ptr
%tmp1 = load <8 x i8>* %B
%tmp2 = extractelement <8 x i8> %tmp1, i32 3
define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vst1lanei16:
;Check the alignment value. Max for this instruction is 16 bits:
-;CHECK: vst1.16 {{{d[0-9]+}}[2]}, [r0:16]
+;CHECK: vst1.16 {d16[2]}, [r0:16]
%tmp1 = load <4 x i16>* %B
%tmp2 = extractelement <4 x i16> %tmp1, i32 2
store i16 %tmp2, i16* %A, align 8
define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vst1lanei32:
;Check the alignment value. Max for this instruction is 32 bits:
-;CHECK: vst1.32 {{{d[0-9]+}}[1]}, [r0:32]
+;CHECK: vst1.32 {d16[1]}, [r0:32]
%tmp1 = load <2 x i32>* %B
%tmp2 = extractelement <2 x i32> %tmp1, i32 1
store i32 %tmp2, i32* %A, align 8
define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vst1lanef:
-;CHECK: vst1.32 {{{d[0-9]+}}[1]}, [r0:32]
+;CHECK: vst1.32 {d16[1]}, [r0:32]
%tmp1 = load <2 x float>* %B
%tmp2 = extractelement <2 x float> %tmp1, i32 1
store float %tmp2, float* %A
define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vst1laneQi8:
; // Can use scalar load. No need to use vectors.
-; // CHE-CK: vst1.8 {{{d[0-9]+}}[1]}, [r0]
+; // CHE-CK: vst1.8 {d17[1]}, [r0]
%tmp1 = load <16 x i8>* %B
%tmp2 = extractelement <16 x i8> %tmp1, i32 9
store i8 %tmp2, i8* %A, align 8
define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vst1laneQi16:
-;CHECK: vst1.16 {{{d[0-9]+}}[1]}, [r0:16]
+;CHECK: vst1.16 {d17[1]}, [r0:16]
%tmp1 = load <8 x i16>* %B
%tmp2 = extractelement <8 x i16> %tmp1, i32 5
store i16 %tmp2, i16* %A, align 8
define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vst1laneQi32:
; // Can use scalar load. No need to use vectors.
-; // CHE-CK: vst1.32 {{{d[0-9]+}}[1]}, [r0:32]
+; // CHE-CK: vst1.32 {d17[1]}, [r0:32]
%tmp1 = load <4 x i32>* %B
%tmp2 = extractelement <4 x i32> %tmp1, i32 3
store i32 %tmp2, i32* %A, align 8
define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vst1laneQi32_update:
; // Can use scalar load. No need to use vectors.
-; // CHE-CK: vst1.32 {{{d[0-9]+}}[1]}, [{{r[0-9]+}}:32]!
+; // CHE-CK: vst1.32 {d17[1]}, [r1:32]!
%A = load i32** %ptr
%tmp1 = load <4 x i32>* %B
%tmp2 = extractelement <4 x i32> %tmp1, i32 3
define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vst1laneQf:
; // Can use scalar load. No need to use vectors.
-; // CHE-CK: vst1.32 {{{d[0-9]+}}[1]}, [r0]
+; // CHE-CK: vst1.32 {d17[1]}, [r0]
%tmp1 = load <4 x float>* %B
%tmp2 = extractelement <4 x float> %tmp1, i32 3
store float %tmp2, float* %A
;Check for a post-increment updating store with register increment.
define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind {
;CHECK-LABEL: vst2lanei16_update:
-;CHECK: vst2.16 {d16[1], d17[1]}, [{{r[0-9]+}}], {{r[0-9]+}}
+;CHECK: vst2.16 {d16[1], d17[1]}, [r1], r2
%A = load i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>* %B
;Check for a post-increment updating store.
define void @vst3laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vst3laneQi32_update:
-;CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [{{r[0-9]+}}]!
+;CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [r1]!
%A = load i32** %ptr
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>* %B
;Check for a post-increment updating store.
define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vst4lanei8_update:
-;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:32]!
+;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32]!
%A = load i8** %ptr
%tmp1 = load <8 x i8>* %B
call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)