Re-apply r214881: Fix return sequence on armv4 thumb

[oota-llvm.git] / lib / Target / ARM / ARMScheduleSwift.td
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td

index 1c9058c25025f76df6378b47eb80c371ae7a025d..b03d5ff44c6e77c18928942292f1738835e83873 100644 (file)
--- a/lib/Target/ARM/ARMScheduleSwift.td
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -1076,7 +1076,7 @@ def SwiftItineraries : ProcessorItineraries<
  // Swift machine model for scheduling and other instruction cost heuristics.
  def SwiftModel : SchedMachineModel {
    let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
-  let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
+  let MicroOpBufferSize = 45; // Based on NEON renamed registers.
    let LoadLatency = 3;
    let MispredictPenalty = 14; // A branch direction mispredict.
  
@@ -1338,27 +1338,32 @@ let SchedModel = SwiftModel in {
    }
    // 4.2.18 Integer Divide
    def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround.
-  def : InstRW < [WriteDiv],
+  def : InstRW <[SwiftDiv],
          (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
  
    // 4.2.19 Integer Load Single Element
    // 4.2.20 Integer Load Signextended
    def SwiftWriteP2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
      let Latency = 3;
+    let NumMicroOps = 2;
    }
    def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
      let Latency = 4;
+    let NumMicroOps = 2;
    }
    def SwiftWriteP2P01P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01,
                                                     SwiftUnitP01]> {
      let Latency = 4;
+    let NumMicroOps = 3;
    }
    def SwiftWriteP2P2ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2]> {
      let Latency = 3;
+    let NumMicroOps = 2;
    }
    def SwiftWriteP2P2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2,
-                                                    SwiftUnitP01]> {
+                                                   SwiftUnitP01]> {
      let Latency = 3;
+    let NumMicroOps = 3;
    }
    def SwiftWrBackOne : SchedWriteRes<[]> {
      let Latency = 1;
@@ -1399,7 +1404,10 @@ let SchedModel = SwiftModel in {
      def SwiftWriteLM#Lat#Cy : SchedWriteRes<[SwiftUnitP2]> {
        let Latency = Lat;
      }
-    def SwiftWriteLM#Lat#CyNo : SchedWriteRes<[]> { let Latency = Lat; }
+    def SwiftWriteLM#Lat#CyNo : SchedWriteRes<[]> {
+      let Latency = Lat;
+      let NumMicroOps = 0;
+    }
    }
    // Predicate.
    foreach NumAddr = 1-16 in {
@@ -1520,6 +1528,7 @@ let SchedModel = SwiftModel in {
    // 4.2.25 Integer Store, Multiple
    def SwiftWriteStIncAddr : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
      let Latency = 0;
+    let NumMicroOps = 2;
    }
    foreach NumAddr = 1-16 in {
       def SwiftWriteSTM#NumAddr : WriteSequence<[SwiftWriteStIncAddr], NumAddr>;
@@ -1712,7 +1721,7 @@ let SchedModel = SwiftModel in {
      SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
                                  SwiftWriteLM13CyNo, SwiftWriteP01OneCycle,
                                  SwiftVLDMPerm3]>,
-    // Load of a Q register (not neccessarily true). We should not be mapping to
+    // Load of a Q register (not necessarily true). We should not be mapping to
      // 4 S registers, either.
      SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo,
                                  SwiftWriteLM4CyNo, SwiftWriteLM4CyNo]>,
@@ -1849,7 +1858,7 @@ let SchedModel = SwiftModel in {
      // Assume 5 D registers.
      SchedVar<SwiftLMAddr10Pred, [SwiftWriteSTM6]>,
      SchedVar<SwiftLMAddr11Pred, [SwiftWriteSTM12]>,
-    // Asume three Q registers.
+    // Assume three Q registers.
      SchedVar<SwiftLMAddr12Pred, [SwiftWriteSTM4]>,
      SchedVar<SwiftLMAddr13Pred, [SwiftWriteSTM14]>,
      // Assume 7 D registers.
@@ -2043,6 +2052,22 @@ let SchedModel = SwiftModel in {
          (instregex "VST4LN(d|q)(8|16|32)_UPD",
                     "VST4LN(d|q)(8|16|32)Pseudo_UPD")>;
  
+  // 4.2.44 VFP, Divide and Square Root
+  def SwiftDiv17 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
+    let NumMicroOps = 1;
+    let Latency = 17;
+    let ResourceCycles = [1, 15];
+  }
+  def SwiftDiv32 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
+    let NumMicroOps = 1;
+    let Latency = 32;
+    let ResourceCycles = [1, 30];
+  }
+  def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>;
+  def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>;
+
+  // Not specified.
+  def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
    // Preload.
    def : WriteRes<WritePreLd, [SwiftUnitP2]> { let Latency = 0;
      let ResourceCycles = [0];