setStackPointerRegisterToSaveRestore(ARM::SP);
- setSchedulingPreference(Sched::RegPressure);
+ if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+ setSchedulingPreference(Sched::RegPressure);
+ else
+ setSchedulingPreference(Sched::Hybrid);
// FIXME: If-converter should use instruction latency to determine
// profitability rather than relying on fixed limits.
; CORTEXA8: test:
; CORTEXA8: vabs.f32 d1, d1
; CORTEXA9: test:
-; CORTEXA9: vabs.f32 s1, s1
+; CORTEXA9: vabs.f32 s0, s0
; CORTEXA8: test:
; CORTEXA8: vadd.f32 d0, d1, d0
; CORTEXA9: test:
-; CORTEXA9: vadd.f32 s0, s1, s0
+; CORTEXA9: vadd.f32 s0, s0, s1
; CORTEXA8: test:
; CORTEXA8: vdiv.f32 s0, s1, s0
; CORTEXA9: test:
-; CORTEXA9: vdiv.f32 s0, s1, s0
+; CORTEXA9: vdiv.f32 s0, s0, s1
; CORTEXA8: test:
; CORTEXA8: vmul.f32 d0, d1, d0
; CORTEXA9: test:
-; CORTEXA9: vmla.f32 s2, s1, s0
+; CORTEXA9: vmla.f32 s0, s1, s2
; CORTEXA8: test:
; CORTEXA8: vnmls.f32 s2, s1, s0
; CORTEXA9: test:
-; CORTEXA9: vnmls.f32 s2, s1, s0
+; CORTEXA9: vnmls.f32 s0, s1, s2
; CORTEXA8: test:
; CORTEXA8: vmul.f32 d0, d1, d0
; CORTEXA9: test:
-; CORTEXA9: vmul.f32 s0, s1, s0
+; CORTEXA9: vmul.f32 s0, s0, s1
; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
define float @test1(float %acc, float %a, float %b) nounwind {
-; CHECK: vnmla.f32 s2, s1, s0
+; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}}
entry:
%0 = fmul float %a, %b
%1 = fsub float -0.0, %0
}
define float @test2(float %acc, float %a, float %b) nounwind {
-; CHECK: vnmla.f32 s2, s1, s0
+; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}}
entry:
%0 = fmul float %a, %b
%1 = fmul float -1.0, %0
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
; Implementing vld / vst as REG_SEQUENCE eliminates the extra vmov's.
%struct.int16x8_t = type { <8 x i16> }
entry:
; CHECK: t2:
; CHECK: vld1.16
-; CHECK-NOT: vmov
-; CHECK: vmul.i16
; CHECK: vld1.16
-; CHECK: vst1.16
; CHECK-NOT: vmov
; CHECK: vmul.i16
+; CHECK: vmul.i16
+; CHECK-NOT: vmov
+; CHECK: vst1.16
; CHECK: vst1.16
%0 = getelementptr inbounds %struct.int16x8_t* %vT0ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
%1 = load <8 x i16>* %0, align 16 ; <<8 x i16>> [#uses=1]
define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
; CHECK: t9:
; CHECK: vldr.64
-; CHECK-NEXT: vstmia r0, {d0,d1}
-; CHECK-NEXT: vmov.i8 d1
+; CHECK: vmov.i8 d1
+; CHECK-NEXT: vstmia r0, {d2,d3}
; CHECK-NEXT: vstmia r0, {d0,d1}
%3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
%4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]