Make use of new reserved/required scheduling stuff: introduce VFP and NEON locks...

author Anton Korobeynikov <asl@math.spbu.ru>

Wed, 7 Apr 2010 18:19:40 +0000 (18:19 +0000)

committer Anton Korobeynikov <asl@math.spbu.ru>

Wed, 7 Apr 2010 18:19:40 +0000 (18:19 +0000)
author Anton Korobeynikov <asl@math.spbu.ru>
Wed, 7 Apr 2010 18:19:40 +0000 (18:19 +0000)
committer Anton Korobeynikov <asl@math.spbu.ru>
Wed, 7 Apr 2010 18:19:40 +0000 (18:19 +0000)
diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td

index bbf43dec58acc3f13f68ce9e330fee0a1302e396..bd6791f92ea7c284f139e9c571b49fd9947f324f 100644 (file)
--- a/include/llvm/Target/TargetSchedule.td
+++ b/include/llvm/Target/TargetSchedule.td
@@ -23,7 +23,7 @@
  class FuncUnit;
  
  class ReservationKind<bits<1> val> {
-  bits<1> Value = val;
+  int Value = val;
  }
  
  def Required : ReservationKind<0>;
@@ -43,14 +43,19 @@ def Reserved : ReservationKind<1>;
  //   InstrStage<1, [FU_x, FU_y]>     - TimeInc defaults to Cycles
  //   InstrStage<1, [FU_x, FU_y], 0>  - TimeInc explicit
  //
-class InstrStage<int cycles, list<FuncUnit> units,
-                 int timeinc = -1, ReservationKind kind = Required> {
+
+class InstrStage2<int cycles, list<FuncUnit> units,
+                  int timeinc, ReservationKind kind> {
    int Cycles          = cycles;       // length of stage in machine cycles
    list<FuncUnit> Units = units;       // choice of functional units
    int TimeInc         = timeinc;      // cycles till start of next stage
    int Kind            = kind.Value;   // kind of FU reservation
  }
  
+class InstrStage<int cycles, list<FuncUnit> units,
+                 int timeinc = -1>
+  : InstrStage2<cycles, units, timeinc, Required>;
+
  //===----------------------------------------------------------------------===//
  // Instruction itinerary - An itinerary represents a sequential series of steps
  // required to complete an instruction.  Itineraries are represented as lists of
@@ -71,10 +76,10 @@ def NoItinerary : InstrItinClass;
  // Instruction itinerary data - These values provide a runtime map of an 
  // instruction itinerary class (name) to its itinerary data.
  //
-class InstrItinData<InstrItinClass Class, list<InstrStage> stages,
+class InstrItinData<InstrItinClass Class, list<InstrStage2> stages,
                      list<int> operandcycles = []> {
    InstrItinClass TheClass = Class;
-  list<InstrStage> Stages = stages;
+  list<InstrStage2> Stages = stages;
    list<int> OperandCycles = operandcycles;
  }
  
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td

index fc4c5f5830b0009ad2a1b08af759d5fafeacf2db..db15a85e40f1b637d915630c8849818acd93d6cc 100644 (file)
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -17,6 +17,8 @@ def FU_LdSt0   : FuncUnit; // pipeline 0 load/store
  def FU_LdSt1   : FuncUnit; // pipeline 1 load/store
  def FU_NPipe   : FuncUnit; // NEON ALU/MUL pipe
  def FU_NLSPipe : FuncUnit; // NEON LS pipe
+def FU_DRegsVFP: FuncUnit; // FP register set, VFP side
+def FU_DRegsN  : FuncUnit; // FP register set, NEON side
  
  //===----------------------------------------------------------------------===//
  // Instruction Itinerary classes used for ARM
diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td

index b121045dd5b02ca2cc3c2acec2972460d79008f6..0d7a5539c1db98580d391293969b8cbc074f416c 100644 (file)
--- a/lib/Target/ARM/ARMScheduleV7.td
+++ b/lib/Target/ARM/ARMScheduleV7.td
@@ -593,94 +593,147 @@ def CortexA8Itineraries : ProcessorItineraries<[
  // Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1
  //
  def CortexA9Itineraries : ProcessorItineraries<[
+  // VFP and NEON shares the same register file. This means that every VFP
+  // instruction should wait for full completion of the consecutive NEON
+  // instruction and vice-versa. We model this behavior with two artificial FUs:
+  // DRegsVFP and DRegsVFP.
+  //
+  // Every VFP instruction:
+  //  - Acquires DRegsVFP resource for 1 cycle
+  //  - Reserves DRegsN resource for the whole duration.
+  // Every NEON instruction does the same but with FUs swapped.
+  //
+  // Since the reserved FU cannot be acquired this models precisly "cross-domain"
+  // stalls.
  
    // VFP
    // Issue through integer pipeline, and execute in NEON unit.
-  //
  
    // FP Special Register to Integer Register File Move
-  InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpSTAT , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                              InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                              InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>]>,
    //
    // Single-precision FP Unary
-  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpUNA32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<1, [FU_NPipe]>], [1, 1]>,
    //
    // Double-precision FP Unary
-  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpUNA64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<1, [FU_NPipe]>], [1, 1]>,
  
    //
    // Single-precision FP Compare
-  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCMP32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<1, [FU_NPipe]>], [1, 1]>,
    //
    // Double-precision FP Compare
-  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCMP64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<1, [FU_NPipe]>], [1, 1]>,
    //
    // Single to Double FP Convert
-  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCVTSD , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<1, [FU_NPipe]>], [4, 1]>,
    //
    // Double to Single FP Convert
-  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCVTDS , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<1, [FU_NPipe]>], [4, 1]>,
    //
    // Single-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCVTSI , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<1, [FU_NPipe]>], [4, 1]>,
    //
    // Double-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCVTDI , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<1, [FU_NPipe]>], [4, 1]>,
    //
    // Integer to Single-Precision FP Convert
-  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCVTIS , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<1, [FU_NPipe]>], [4, 1]>,
    //
    // Integer to Double-Precision FP Convert
-  InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCVTID , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<1, [FU_NPipe]>], [4, 1]>,
    //
    // Single-precision FP ALU
-  InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpALU32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
    //
    // Double-precision FP ALU
-  InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpALU64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
    //
    // Single-precision FP Multiply
-  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpMUL32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<6, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<1, [FU_NPipe]>], [5, 1, 1]>,
    //
    // Double-precision FP Multiply
-  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpMUL64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<7, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<2, [FU_NPipe]>], [6, 1, 1]>,
    //
    // Single-precision FP MAC
-  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpMAC32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<9, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<1, [FU_NPipe]>], [8, 0, 1, 1]>,
    //
    // Double-precision FP MAC
-  InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpMAC64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<10, [FU_DRegsN],  0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<2, [FU_NPipe]>], [9, 0, 1, 1]>,
    //
    // Single-precision FP DIV
-  InstrItinData<IIC_fpDIV32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpDIV32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<16, [FU_DRegsN],  0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<10, [FU_NPipe]>], [15, 1, 1]>,
    //
    // Double-precision FP DIV
-  InstrItinData<IIC_fpDIV64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpDIV64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<26, [FU_DRegsN],  0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<20, [FU_NPipe]>], [25, 1, 1]>,
    //
    // Single-precision FP SQRT
-  InstrItinData<IIC_fpSQRT32, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpSQRT32, [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<18, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<13, [FU_NPipe]>], [17, 1]>,
    //
    // Double-precision FP SQRT
-  InstrItinData<IIC_fpSQRT64, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpSQRT64, [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<33, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                 InstrStage<28, [FU_NPipe]>], [32, 1]>
  ]>;
author	Anton Korobeynikov <asl@math.spbu.ru>
	Wed, 7 Apr 2010 18:19:40 +0000 (18:19 +0000)
committer	Anton Korobeynikov <asl@math.spbu.ru>
	Wed, 7 Apr 2010 18:19:40 +0000 (18:19 +0000)
include/llvm/Target/TargetSchedule.td		patch \| blob \| history
lib/Target/ARM/ARMSchedule.td		patch \| blob \| history
lib/Target/ARM/ARMScheduleV7.td		patch \| blob \| history