-; RUN: llc -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp -float-abi=hard < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp -float-abi=hard < %s | FileCheck %s
define <2 x float> @test_vmovs_via_vext_lane0to0(float %arg, <2 x float> %in) {
-; CHECK: test_vmovs_via_vext_lane0to0:
+; CHECK-LABEL: test_vmovs_via_vext_lane0to0:
%vec = insertelement <2 x float> %in, float %arg, i32 0
%res = fadd <2 x float> %vec, %vec
}
define <2 x float> @test_vmovs_via_vext_lane0to1(float %arg, <2 x float> %in) {
-; CHECK: test_vmovs_via_vext_lane0to1:
+; CHECK-LABEL: test_vmovs_via_vext_lane0to1:
%vec = insertelement <2 x float> %in, float %arg, i32 1
%res = fadd <2 x float> %vec, %vec
}
define <2 x float> @test_vmovs_via_vext_lane1to0(float, float %arg, <2 x float> %in) {
-; CHECK: test_vmovs_via_vext_lane1to0:
+; CHECK-LABEL: test_vmovs_via_vext_lane1to0:
%vec = insertelement <2 x float> %in, float %arg, i32 0
%res = fadd <2 x float> %vec, %vec
}
define <2 x float> @test_vmovs_via_vext_lane1to1(float, float %arg, <2 x float> %in) {
-; CHECK: test_vmovs_via_vext_lane1to1:
+; CHECK-LABEL: test_vmovs_via_vext_lane1to1:
%vec = insertelement <2 x float> %in, float %arg, i32 1
%res = fadd <2 x float> %vec, %vec
define float @test_vmovs_via_vdup(float, float %ret, float %lhs, float %rhs) {
-; CHECK: test_vmovs_via_vdup:
+; CHECK-LABEL: test_vmovs_via_vdup:
; Do an operation (which will end up NEON because of +neonfp) to convince the
; execution-domain pass that NEON is a good thing to use.
ret float %res
}
+declare float @llvm.sqrt.f32(float)
+
+declare void @bar()
+
+; This is a comp
+define float @test_ineligible(float, float %in) {
+; CHECK-LABEL: test_ineligible:
+
+ %sqrt = call float @llvm.sqrt.f32(float %in)
+ %val = fadd float %sqrt, %sqrt
+
+ ; This call forces a move from a callee-saved register to the return-reg. That
+ ; move is not eligible for conversion to a d-register instructions because the
+ ; use-def chains would be messed up. Primarily a compile-test (we used to
+ ; internal fault).
+ call void @bar()
+; CHECK: bl bar
+; CHECK: vext.32
+; CHECK: vext.32
+ ret float %val
+}
+
+define i32 @test_vmovs_no_sreg(i32 %in) {
+; CHECK-LABEL: test_vmovs_no_sreg:
+
+ ; Check that the movement to and from GPRs takes place in the NEON domain.
+; CHECK: vmov.32 d
+ %x = bitcast i32 %in to float
+
+ %res = fadd float %x, %x
+
+; CHECK: vmov.32 r{{[0-9]+}}, d
+ %resi = bitcast float %res to i32
+
+ ret i32 %resi
+}
+
+
+; The point of this test is:
+; + Make sure s1 is live before the BL
+; + Make sure s1 is clobbered by the BL
+; + Convince LLVM to emit a VMOV to S0
+; + Convince LLVM to domain-convert this.
+
+; When all of those are satisfied, LLVM should *not* mark s1 as an implicit-use
+; because it's dead.
+
+declare float @clobbers_s1(float, float)
+
+define <2 x float> @test_clobbers_recognised(<2 x float> %invec, float %val) {
+ %elt = call float @clobbers_s1(float %val, float %val)
+
+ %vec = insertelement <2 x float> %invec, float %elt, i32 0
+ %res = fadd <2 x float> %vec, %vec
+ ret <2 x float> %res
+}