Change ARM scheduling default to list-hybrid if the target supports floating point...

author Evan Cheng <evan.cheng@apple.com>

Fri, 21 May 2010 00:43:17 +0000 (00:43 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Fri, 21 May 2010 00:43:17 +0000 (00:43 +0000)
author Evan Cheng <evan.cheng@apple.com>
Fri, 21 May 2010 00:43:17 +0000 (00:43 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Fri, 21 May 2010 00:43:17 +0000 (00:43 +0000)
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index 340e8870443ff8da9fa2bfece7ecff6f36a86428..c5a6a8e1191586fd0b5c58fa1f48fdb6135a4315 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -467,7 +467,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
  
    setStackPointerRegisterToSaveRestore(ARM::SP);
  
-  setSchedulingPreference(Sched::RegPressure);
+  if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+    setSchedulingPreference(Sched::RegPressure);
+  else
+    setSchedulingPreference(Sched::Hybrid);
  
    // FIXME: If-converter should use instruction latency to determine
    // profitability rather than relying on fixed limits.
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll

index f03282bdab7f4c4020dd55194f6c815c9a27309b..dfc1e0a957c3de668fb8a6cd7f260d8476ab1718 100644 (file)
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -24,4 +24,4 @@ declare float @fabsf(float)
  ; CORTEXA8: test:
  ; CORTEXA8:    vabs.f32        d1, d1
  ; CORTEXA9: test:
-; CORTEXA9:    vabs.f32        s1, s1
+; CORTEXA9:    vabs.f32        s0, s0
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll

index 749690e98d0f4fb638a2b9b2cabb9780b8a3df59..113f0e29bd15800e00b1726c788d18d7bd87cf33 100644 (file)
--- a/test/CodeGen/ARM/fadds.ll
+++ b/test/CodeGen/ARM/fadds.ll
@@ -20,4 +20,4 @@ entry:
  ; CORTEXA8: test:
  ; CORTEXA8:    vadd.f32        d0, d1, d0
  ; CORTEXA9: test:
-; CORTEXA9:    vadd.f32        s0, s1, s0
+; CORTEXA9:    vadd.f32        s0, s0, s1
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll

index 0c31495792979b2a62c008fe10a35b4eaa288cad..9af1217de1d080862b9bd2221bd46519f6e9ca73 100644 (file)
--- a/test/CodeGen/ARM/fdivs.ll
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -20,4 +20,4 @@ entry:
  ; CORTEXA8: test:
  ; CORTEXA8:    vdiv.f32        s0, s1, s0
  ; CORTEXA9: test:
-; CORTEXA9:    vdiv.f32        s0, s1, s0
+; CORTEXA9:    vdiv.f32        s0, s0, s1
diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll

index f8b47b5bac0deea64047fc2eeb05561f6fa82c3a..c4ceca9828b06d50c0462fe5dcfda60d8a48c6de 100644 (file)
--- a/test/CodeGen/ARM/fmacs.ll
+++ b/test/CodeGen/ARM/fmacs.ll
@@ -21,4 +21,4 @@ entry:
  ; CORTEXA8: test:
  ; CORTEXA8:    vmul.f32        d0, d1, d0
  ; CORTEXA9: test:
-; CORTEXA9:    vmla.f32        s2, s1, s0
+; CORTEXA9:    vmla.f32        s0, s1, s2
diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll

index 7a70543dee6cf4b96891d5a1a09b4170fdb92627..103ce334519bb8d7d34d509dc7c3ac3a5317fce2 100644 (file)
--- a/test/CodeGen/ARM/fmscs.ll
+++ b/test/CodeGen/ARM/fmscs.ll
@@ -21,4 +21,4 @@ entry:
  ; CORTEXA8: test:
  ; CORTEXA8:    vnmls.f32       s2, s1, s0
  ; CORTEXA9: test:
-; CORTEXA9:    vnmls.f32       s2, s1, s0
+; CORTEXA9:    vnmls.f32       s0, s1, s2
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll

index ef4e3e52818e84cc779dfcdba10d377ff2c46d9c..bfafd20c8602c51e5dbc6adf60a1b1e2fa1b5c80 100644 (file)
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -20,4 +20,4 @@ entry:
  ; CORTEXA8: test:
  ; CORTEXA8:    vmul.f32        d0, d1, d0
  ; CORTEXA9: test:
-; CORTEXA9:    vmul.f32        s0, s1, s0
+; CORTEXA9:    vmul.f32        s0, s0, s1
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll

index 6b7cefa6414d2df788af2ff5eec56825c5c1a1f7..0b47edd5f1f121546898d2ffb0dfa32e2a15d289 100644 (file)
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -4,7 +4,7 @@
  ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
  
  define float @test1(float %acc, float %a, float %b) nounwind {
-; CHECK: vnmla.f32 s2, s1, s0
+; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}}
  entry:
         %0 = fmul float %a, %b
         %1 = fsub float -0.0, %0
@@ -13,7 +13,7 @@ entry:
  }
  
  define float @test2(float %acc, float %a, float %b) nounwind {
-; CHECK: vnmla.f32 s2, s1, s0
+; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}}
  entry:
         %0 = fmul float %a, %b
         %1 = fmul float -1.0, %0
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll

index 1a27d4d93979adb5948aa8a922d55b7a99e24e2b..3ba82ccdfa96e3056a7c6b4b36625ebd1aa77fa2 100644 (file)
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
  ; Implementing vld / vst as REG_SEQUENCE eliminates the extra vmov's.
  
  %struct.int16x8_t = type { <8 x i16> }
@@ -45,12 +45,12 @@ define arm_apcscc void @t2(i16* %i_ptr, i16* %o_ptr, %struct.int16x8_t* nocaptur
  entry:
  ; CHECK:        t2:
  ; CHECK:        vld1.16
-; CHECK-NOT:    vmov
-; CHECK:        vmul.i16
  ; CHECK:        vld1.16
-; CHECK:        vst1.16
  ; CHECK-NOT:    vmov
  ; CHECK:        vmul.i16
+; CHECK:        vmul.i16
+; CHECK-NOT:    vmov
+; CHECK:        vst1.16
  ; CHECK:        vst1.16
    %0 = getelementptr inbounds %struct.int16x8_t* %vT0ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
    %1 = load <8 x i16>* %0, align 16               ; <<8 x i16>> [#uses=1]
@@ -238,8 +238,8 @@ bb14:                                             ; preds = %bb6
  define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
  ; CHECK:        t9:
  ; CHECK:        vldr.64
-; CHECK-NEXT:   vstmia r0, {d0,d1}
-; CHECK-NEXT:   vmov.i8 d1
+; CHECK:        vmov.i8 d1
+; CHECK-NEXT:   vstmia r0, {d2,d3}
  ; CHECK-NEXT:   vstmia r0, {d0,d1}
    %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
    %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
author	Evan Cheng <evan.cheng@apple.com>
	Fri, 21 May 2010 00:43:17 +0000 (00:43 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Fri, 21 May 2010 00:43:17 +0000 (00:43 +0000)
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
test/CodeGen/ARM/fabss.ll		patch \| blob \| history
test/CodeGen/ARM/fadds.ll		patch \| blob \| history
test/CodeGen/ARM/fdivs.ll		patch \| blob \| history
test/CodeGen/ARM/fmacs.ll		patch \| blob \| history
test/CodeGen/ARM/fmscs.ll		patch \| blob \| history
test/CodeGen/ARM/fmuls.ll		patch \| blob \| history
test/CodeGen/ARM/fnmscs.ll		patch \| blob \| history
test/CodeGen/ARM/reg_sequence.ll		patch \| blob \| history