Make use of new reserved/required scheduling stuff: introduce VFP and NEON locks...
authorAnton Korobeynikov <asl@math.spbu.ru>
Wed, 7 Apr 2010 18:19:40 +0000 (18:19 +0000)
committerAnton Korobeynikov <asl@math.spbu.ru>
Wed, 7 Apr 2010 18:19:40 +0000 (18:19 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100646 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/Target/TargetSchedule.td
lib/Target/ARM/ARMSchedule.td
lib/Target/ARM/ARMScheduleV7.td

index bbf43dec58acc3f13f68ce9e330fee0a1302e396..bd6791f92ea7c284f139e9c571b49fd9947f324f 100644 (file)
@@ -23,7 +23,7 @@
 class FuncUnit;
 
 class ReservationKind<bits<1> val> {
-  bits<1> Value = val;
+  int Value = val;
 }
 
 def Required : ReservationKind<0>;
@@ -43,14 +43,19 @@ def Reserved : ReservationKind<1>;
 //   InstrStage<1, [FU_x, FU_y]>     - TimeInc defaults to Cycles
 //   InstrStage<1, [FU_x, FU_y], 0>  - TimeInc explicit
 //
-class InstrStage<int cycles, list<FuncUnit> units,
-                 int timeinc = -1, ReservationKind kind = Required> {
+
+class InstrStage2<int cycles, list<FuncUnit> units,
+                  int timeinc, ReservationKind kind> {
   int Cycles          = cycles;       // length of stage in machine cycles
   list<FuncUnit> Units = units;       // choice of functional units
   int TimeInc         = timeinc;      // cycles till start of next stage
   int Kind            = kind.Value;   // kind of FU reservation
 }
 
+class InstrStage<int cycles, list<FuncUnit> units,
+                 int timeinc = -1>
+  : InstrStage2<cycles, units, timeinc, Required>;
+
 //===----------------------------------------------------------------------===//
 // Instruction itinerary - An itinerary represents a sequential series of steps
 // required to complete an instruction.  Itineraries are represented as lists of
@@ -71,10 +76,10 @@ def NoItinerary : InstrItinClass;
 // Instruction itinerary data - These values provide a runtime map of an 
 // instruction itinerary class (name) to its itinerary data.
 //
-class InstrItinData<InstrItinClass Class, list<InstrStage> stages,
+class InstrItinData<InstrItinClass Class, list<InstrStage2> stages,
                     list<int> operandcycles = []> {
   InstrItinClass TheClass = Class;
-  list<InstrStage> Stages = stages;
+  list<InstrStage2> Stages = stages;
   list<int> OperandCycles = operandcycles;
 }
 
index fc4c5f5830b0009ad2a1b08af759d5fafeacf2db..db15a85e40f1b637d915630c8849818acd93d6cc 100644 (file)
@@ -17,6 +17,8 @@ def FU_LdSt0   : FuncUnit; // pipeline 0 load/store
 def FU_LdSt1   : FuncUnit; // pipeline 1 load/store
 def FU_NPipe   : FuncUnit; // NEON ALU/MUL pipe
 def FU_NLSPipe : FuncUnit; // NEON LS pipe
+def FU_DRegsVFP: FuncUnit; // FP register set, VFP side
+def FU_DRegsN  : FuncUnit; // FP register set, NEON side
 
 //===----------------------------------------------------------------------===//
 // Instruction Itinerary classes used for ARM
index b121045dd5b02ca2cc3c2acec2972460d79008f6..0d7a5539c1db98580d391293969b8cbc074f416c 100644 (file)
@@ -593,94 +593,147 @@ def CortexA8Itineraries : ProcessorItineraries<[
 // Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1
 //
 def CortexA9Itineraries : ProcessorItineraries<[
+  // VFP and NEON shares the same register file. This means that every VFP
+  // instruction should wait for full completion of the consecutive NEON
+  // instruction and vice-versa. We model this behavior with two artificial FUs:
+  // DRegsVFP and DRegsVFP.
+  //
+  // Every VFP instruction:
+  //  - Acquires DRegsVFP resource for 1 cycle
+  //  - Reserves DRegsN resource for the whole duration.
+  // Every NEON instruction does the same but with FUs swapped.
+  //
+  // Since the reserved FU cannot be acquired this models precisly "cross-domain"
+  // stalls.
 
   // VFP
   // Issue through integer pipeline, and execute in NEON unit.
-  //
 
   // FP Special Register to Integer Register File Move
-  InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpSTAT , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                              InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                              InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                               InstrStage<1, [FU_NPipe]>]>,
   //
   // Single-precision FP Unary
-  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpUNA32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [1, 1]>,
   //
   // Double-precision FP Unary
-  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpUNA64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [1, 1]>,
 
   //
   // Single-precision FP Compare
-  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCMP32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [1, 1]>,
   //
   // Double-precision FP Compare
-  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCMP64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [1, 1]>,
   //
   // Single to Double FP Convert
-  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCVTSD , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Double to Single FP Convert
-  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCVTDS , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Single-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCVTSI , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Double-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCVTDI , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Integer to Single-Precision FP Convert
-  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCVTIS , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Integer to Double-Precision FP Convert
-  InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCVTID , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Single-precision FP ALU
-  InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpALU32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
   //
   // Double-precision FP ALU
-  InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpALU64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
   //
   // Single-precision FP Multiply
-  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpMUL32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<6, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [5, 1, 1]>,
   //
   // Double-precision FP Multiply
-  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpMUL64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<7, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [6, 1, 1]>,
   //
   // Single-precision FP MAC
-  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpMAC32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<9, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [8, 0, 1, 1]>,
   //
   // Double-precision FP MAC
-  InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpMAC64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<10, [FU_DRegsN],  0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [9, 0, 1, 1]>,
   //
   // Single-precision FP DIV
-  InstrItinData<IIC_fpDIV32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpDIV32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<16, [FU_DRegsN],  0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<10, [FU_NPipe]>], [15, 1, 1]>,
   //
   // Double-precision FP DIV
-  InstrItinData<IIC_fpDIV64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpDIV64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<26, [FU_DRegsN],  0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<20, [FU_NPipe]>], [25, 1, 1]>,
   //
   // Single-precision FP SQRT
-  InstrItinData<IIC_fpSQRT32, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpSQRT32, [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<18, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<13, [FU_NPipe]>], [17, 1]>,
   //
   // Double-precision FP SQRT
-  InstrItinData<IIC_fpSQRT64, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpSQRT64, [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<33, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<28, [FU_NPipe]>], [32, 1]>
 ]>;