ARM: Enable MachineScheduler and disable PostRAScheduler for swift.

author Matthias Braun <matze@braunis.de>

Fri, 17 Jul 2015 01:44:31 +0000 (01:44 +0000)

committer Matthias Braun <matze@braunis.de>

Fri, 17 Jul 2015 01:44:31 +0000 (01:44 +0000)
author Matthias Braun <matze@braunis.de>
Fri, 17 Jul 2015 01:44:31 +0000 (01:44 +0000)
committer Matthias Braun <matze@braunis.de>
Fri, 17 Jul 2015 01:44:31 +0000 (01:44 +0000)
diff --git a/include/llvm/MC/MCSchedule.h b/include/llvm/MC/MCSchedule.h

index c09791631056d24f8325028c6ab07b56f4eb8d6b..a8b20570103eaea22bd2c20cd79e296446c5d3b9 100644 (file)
--- a/include/llvm/MC/MCSchedule.h
+++ b/include/llvm/MC/MCSchedule.h
@@ -206,6 +206,9 @@ struct MCSchedModel {
    /// scheduling class (itinerary class or SchedRW list).
    bool isComplete() const { return CompleteModel; }
  
+  /// Return true if machine supports out of order execution.
+  bool isOutOfOrder() const { return MicroOpBufferSize > 1; }
+
    unsigned getNumProcResourceKinds() const {
      return NumProcResourceKinds;
    }
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td

index b03d5ff44c6e77c18928942292f1738835e83873..8f9cf368ced0b41cc1aac4a37407d2b42bae06c2 100644 (file)
--- a/lib/Target/ARM/ARMScheduleSwift.td
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -37,1050 +37,12 @@ def SW_FDIV : FuncUnit;
  // FIXME: Add preload instruction when it is documented.
  // FIXME: Model non-pipelined nature of FP div / sqrt unit.
  
-def SwiftItineraries : ProcessorItineraries<
-  [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
-  //
-  // Move instructions, unconditional
-  InstrItinData<IIC_iMOVi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iMOVr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iMOVsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iMOVsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [2]>,
-  InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1]>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1]>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                                 [3]>,
-  InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>,
-                               InstrStage<1, [SW_LS]>],
-                              [5]>,
-  //
-  // MVN instructions
-  InstrItinData<IIC_iMVNi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iMVNr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iMVNsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iMVNsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  //
-  // No operand cycles
-  InstrItinData<IIC_iALUx   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
-  //
-  // Binary Instructions that produce a result
-  InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [1, 1]>,
-  InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [1, 1, 1]>,
-  InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [2, 1, 1]>,
-  InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [2, 1, 1]>,
-  InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [2, 1, 1, 1]>,
-  //
-  // Bitwise Instructions that produce a result
-  InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [1, 1]>,
-  InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [1, 1, 1]>,
-  InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [2, 1, 1]>,
-  InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [2, 1, 1, 1]>,
-  //
-  // Unary Instructions that produce a result
-
-  // CLZ, RBIT, etc.
-  InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [1, 1]>,
-
-  // BFC, BFI, UBFX, SBFX
-  InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [2, 1]>,
-
-  //
-  // Zero and sign extension instructions
-  InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [1, 1]>,
-  InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [1, 1, 1]>,
-  InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [1, 1, 1, 1]>,
-  //
-  // Compare instructions
-  InstrItinData<IIC_iCMPi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iCMPr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1, 1]>,
-  InstrItinData<IIC_iCMPsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<2, [SW_ALU0, SW_ALU1]>],
-                              [1, 1]>,
-  InstrItinData<IIC_iCMPsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<2, [SW_ALU0, SW_ALU1]>],
-                              [1, 1, 1]>,
-  //
-  // Test instructions
-  InstrItinData<IIC_iTSTi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iTSTr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1, 1]>,
-  InstrItinData<IIC_iTSTsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<2, [SW_ALU0, SW_ALU1]>],
-                              [1, 1]>,
-  InstrItinData<IIC_iTSTsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<2, [SW_ALU0, SW_ALU1]>],
-                              [1, 1, 1]>,
-  //
-  // Move instructions, conditional
-  // FIXME: Correctly model the extra input dep on the destination.
-  InstrItinData<IIC_iCMOVi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iCMOVr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1, 1]>,
-  InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1, 1]>,
-  InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [2, 1, 1]>,
-  InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [2]>,
-
-  // Integer multiply pipeline
-  //
-  InstrItinData<IIC_iMUL16  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [3, 1, 1]>,
-  InstrItinData<IIC_iMAC16  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [3, 1, 1, 1]>,
-  InstrItinData<IIC_iMUL32  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-  InstrItinData<IIC_iMAC32  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1, 1]>,
-  InstrItinData<IIC_iMUL64  , [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0], 1>,
-                               InstrStage<1, [SW_ALU0], 3>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [5, 5, 1, 1]>,
-  InstrItinData<IIC_iMAC64  , [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0], 1>,
-                               InstrStage<1, [SW_ALU0], 1>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [5, 6, 1, 1]>,
-  //
-  // Integer divide
-  InstrItinData<IIC_iDIV  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0], 0>,
-                             InstrStage<14, [SW_IDIV]>],
-                            [14, 1, 1]>,
-
-  // Integer load pipeline
-  // FIXME: The timings are some rough approximations
-  //
-  // Immediate offset
-  InstrItinData<IIC_iLoad_i   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS]>],
-                                [3, 1]>,
-  InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS]>],
-                                [3, 1]>,
-  InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_LS], 1>,
-                                 InstrStage<1, [SW_LS]>],
-                                [3, 4, 1]>,
-  //
-  // Register offset
-  InstrItinData<IIC_iLoad_r   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS]>],
-                                [3, 1, 1]>,
-  InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS]>],
-                                [3, 1, 1]>,
-  InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS], 1>,
-                                 InstrStage<1, [SW_LS], 3>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                                [3, 4, 1, 1]>,
-  //
-  // Scaled register offset
-  InstrItinData<IIC_iLoad_si  , [InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
-                                 InstrStage<1, [SW_LS]>],
-                                [5, 1, 1]>,
-  InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
-                                 InstrStage<1, [SW_LS]>],
-                                [5, 1, 1]>,
-  //
-  // Immediate offset with update
-  InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                 InstrStage<1, [SW_LS]>],
-                                [3, 1, 1]>,
-  InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                 InstrStage<1, [SW_LS]>],
-                                [3, 1, 1]>,
-  //
-  // Register offset with update
-  InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_ALU0], 1>,
-                                 InstrStage<1, [SW_LS]>],
-                                [3, 1, 1, 1]>,
-  InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_ALU0], 1>,
-                                 InstrStage<1, [SW_LS]>],
-                                [3, 1, 1, 1]>,
-  InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_DIS2], 0>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
-                                 InstrStage<1, [SW_LS], 3>,
-                                 InstrStage<1, [SW_LS], 0>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                                [3, 4, 1, 1]>,
-  //
-  // Scaled register offset with update
-  InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_DIS2], 0>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
-                                 InstrStage<1, [SW_LS], 3>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                                [5, 3, 1, 1]>,
-  InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
-                                  InstrStage<1, [SW_DIS1], 0>,
-                                  InstrStage<1, [SW_DIS2], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
-                                  InstrStage<1, [SW_LS], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                                [5, 3, 1, 1]>,
-  //
-  // Load multiple, def is the 5th operand.
-  // FIXME: This assumes 3 to 4 registers.
-  InstrItinData<IIC_iLoad_m  , [InstrStage<1, [SW_DIS0], 0>,
-                                InstrStage<1, [SW_DIS1], 0>,
-                                InstrStage<1, [SW_DIS2], 0>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS]>],
-                               [1, 1, 1, 1, 3], [], -1>, // dynamic uops
-
-  //
-  // Load multiple + update, defs are the 1st and 5th operands.
-  InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
-                                InstrStage<1, [SW_DIS1], 0>,
-                                InstrStage<1, [SW_DIS2], 0>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
-                                InstrStage<1, [SW_LS], 3>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                               [2, 1, 1, 1, 3], [], -1>, // dynamic uops
-  //
-  // Load multiple plus branch
-  InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
-                                InstrStage<1, [SW_DIS1], 0>,
-                                InstrStage<1, [SW_DIS2], 0>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS]>],
-                               [1, 1, 1, 1, 3], [], -1>, // dynamic uops
-  //
-  // Pop, def is the 3rd operand.
-  InstrItinData<IIC_iPop  ,    [InstrStage<1, [SW_DIS0], 0>,
-                                InstrStage<1, [SW_DIS1], 0>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS]>],
-                               [1, 1, 3], [], -1>, // dynamic uops
-  //
-  // Pop + branch, def is the 3rd operand.
-  InstrItinData<IIC_iPop_Br,   [InstrStage<1, [SW_DIS0], 0>,
-                                InstrStage<1, [SW_DIS1], 0>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS]>],
-                               [1, 1, 3], [], -1>, // dynamic uops
-
-  //
-  // iLoadi + iALUr for t2LDRpci_pic.
-  InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                InstrStage<1, [SW_LS], 3>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                               [4, 1]>,
-
-  // Integer store pipeline
-  ///
-  // Immediate offset
-  InstrItinData<IIC_iStore_i  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS]>],
-                                [1, 1]>,
-  InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS]>],
-                                [1, 1]>,
-  InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS], 0>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                 InstrStage<1, [SW_LS]>],
-                                [1, 1]>,
-  //
-  // Register offset
-  InstrItinData<IIC_iStore_r  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS]>],
-                                [1, 1, 1]>,
-  InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS]>],
-                                [1, 1, 1]>,
-  InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS], 0>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                 InstrStage<1, [SW_LS]>],
-                                [1, 1, 1]>,
-  //
-  // Scaled register offset
-  InstrItinData<IIC_iStore_si ,  [InstrStage<1, [SW_DIS0], 0>,
-                                  InstrStage<1, [SW_DIS1], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
-                                  InstrStage<1, [SW_LS]>],
-                                 [1, 1, 1]>,
-  InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
-                                  InstrStage<1, [SW_DIS1], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
-                                  InstrStage<1, [SW_LS]>],
-                                 [1, 1, 1]>,
-  //
-  // Immediate offset with update
-  InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [SW_DIS0], 0>,
-                                  InstrStage<1, [SW_DIS1], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                  InstrStage<1, [SW_LS]>],
-                                 [1, 1, 1]>,
-  InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
-                                  InstrStage<1, [SW_DIS1], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                  InstrStage<1, [SW_LS]>],
-                                 [1, 1, 1]>,
-  //
-  // Register offset with update
-  InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [SW_DIS0], 0>,
-                                  InstrStage<1, [SW_DIS1], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                  InstrStage<1, [SW_LS]>],
-                                 [1, 1, 1, 1]>,
-  InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
-                                  InstrStage<1, [SW_DIS1], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                  InstrStage<1, [SW_LS]>],
-                                 [1, 1, 1, 1]>,
-  InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
-                                  InstrStage<1, [SW_DIS1], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                  InstrStage<1, [SW_LS]>],
-                                 [1, 1, 1, 1]>,
-  //
-  // Scaled register offset with update
-  InstrItinData<IIC_iStore_siu,    [InstrStage<1, [SW_DIS0], 0>,
-                                    InstrStage<1, [SW_DIS1], 0>,
-                                    InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
-                                    InstrStage<1, [SW_LS], 0>,
-                                    InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
-                                   [3, 1, 1, 1]>,
-  InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
-                                    InstrStage<1, [SW_DIS1], 0>,
-                                    InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
-                                    InstrStage<1, [SW_LS], 0>,
-                                    InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
-                                   [3, 1, 1, 1]>,
-  //
-  // Store multiple
-  InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
-                                InstrStage<1, [SW_DIS1], 0>,
-                                InstrStage<1, [SW_DIS2], 0>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS], 1>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS], 1>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS]>],
-                                [], [], -1>, // dynamic uops
-  //
-  // Store multiple + update
-  InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
-                                InstrStage<1, [SW_DIS1], 0>,
-                                InstrStage<1, [SW_DIS2], 0>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS], 1>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS], 1>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS]>],
-                               [2], [], -1>, // dynamic uops
-
-  //
-  // Preload
-  InstrItinData<IIC_Preload,   [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
-
-  // Branch
-  //
-  // no delay slots, so the latency of a branch is unimportant
-  InstrItinData<IIC_Br       , [InstrStage<1, [SW_DIS0], 0>]>,
-
-  // FP Special Register to Integer Register File Move
-  InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                             [1]>,
-  //
-  // Single-precision FP Unary
-  //
-  // Most floating-point moves get issued on ALU0.
-  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1]>,
-  //
-  // Double-precision FP Unary
-  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1]>,
-
-  //
-  // Single-precision FP Compare
-  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [1, 1]>,
-  //
-  // Double-precision FP Compare
-  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [1, 1]>,
-  //
-  // Single to Double FP Convert
-  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1]>,
-  //
-  // Double to Single FP Convert
-  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1]>,
-
-  //
-  // Single to Half FP Convert
-  InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_ALU1], 4>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [6, 1]>,
-  //
-  // Half to Single FP Convert
-  InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1]>,
-
-  //
-  // Single-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1]>,
-  //
-  // Double-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1]>,
-  //
-  // Integer to Single-Precision FP Convert
-  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1]>,
-  //
-  // Integer to Double-Precision FP Convert
-  InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1]>,
-  //
-  // Single-precision FP ALU
-  InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Double-precision FP ALU
-  InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Single-precision FP Multiply
-  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1]>,
-  //
-  // Double-precision FP Multiply
-  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [6, 1, 1]>,
-  //
-  // Single-precision FP MAC
-  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 1]>,
-  //
-  // Double-precision FP MAC
-  InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [12, 1, 1]>,
-  //
-  // Single-precision Fused FP MAC
-  InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 1]>,
-  //
-  // Double-precision Fused FP MAC
-  InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [12, 1, 1]>,
-  //
-  // Single-precision FP DIV
-  InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 0>,
-                               InstrStage<15, [SW_FDIV]>],
-                              [17, 1, 1]>,
-  //
-  // Double-precision FP DIV
-  InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 0>,
-                               InstrStage<30, [SW_FDIV]>],
-                              [32, 1, 1]>,
-  //
-  // Single-precision FP SQRT
-  InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 0>,
-                               InstrStage<15, [SW_FDIV]>],
-                              [17, 1]>,
-  //
-  // Double-precision FP SQRT
-  InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 0>,
-                               InstrStage<30, [SW_FDIV]>],
-                              [32, 1, 1]>,
-
-  //
-  // Integer to Single-precision Move
-  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_LS], 4>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [6, 1]>,
-  //
-  // Integer to Double-precision Move
-  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [4, 1]>,
-  //
-  // Single-precision to Integer Move
-  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [3, 1]>,
-  //
-  // Double-precision to Integer Move
-  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_LS], 3>,
-                               InstrStage<1, [SW_LS]>],
-                              [3, 4, 1]>,
-  //
-  // Single-precision FP Load
-  InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [4, 1]>,
-  //
-  // Double-precision FP Load
-  InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [4, 1]>,
-  //
-  // FP Load Multiple
-  // FIXME: Assumes a single Q register.
-  InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [1, 1, 1, 4], [], -1>, // dynamic uops
-  //
-  // FP Load Multiple + update
-  // FIXME: Assumes a single Q register.
-  InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_LS], 4>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [2, 1, 1, 1, 4], [], -1>, // dynamic uops
-  //
-  // Single-precision FP Store
-  InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [1, 1]>,
-  //
-  // Double-precision FP Store
-  InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [1, 1]>,
-  //
-  // FP Store Multiple
-  // FIXME: Assumes a single Q register.
-  InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [1, 1, 1], [], -1>, // dynamic uops
-  //
-  // FP Store Multiple + update
-  // FIXME: Assumes a single Q register.
-  InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
-                                InstrStage<1, [SW_DIS1], 0>,
-                                InstrStage<1, [SW_LS], 4>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                               [2, 1, 1, 1], [], -1>, // dynamic uops
-  // NEON
-  //
-  // Double-register Integer Unary
-  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1]>,
-  //
-  // Quad-register Integer Unary
-  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1]>,
-  //
-  // Double-register Integer Q-Unary
-  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1]>,
-  //
-  // Quad-register Integer CountQ-Unary
-  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1]>,
-  //
-  // Double-register Integer Binary
-  InstrItinData<IIC_VBINiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Quad-register Integer Binary
-  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Double-register Integer Subtract
-  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Quad-register Integer Subtract
-  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Double-register Integer Shift
-  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Quad-register Integer Shift
-  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Double-register Integer Shift (4 cycle)
-  InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-  //
-  // Quad-register Integer Shift (4 cycle)
-  InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-  //
-  // Double-register Integer Binary (4 cycle)
-  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-  //
-  // Quad-register Integer Binary (4 cycle)
-  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-  //
-  // Double-register Integer Subtract (4 cycle)
-  InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-  //
-  // Quad-register Integer Subtract (4 cycle)
-  InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-
-  //
-  // Double-register Integer Count
-  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Quad-register Integer Count
-  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Double-register Absolute Difference and Accumulate
-  InstrItinData<IIC_VABAD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1, 1]>,
-  //
-  // Quad-register Absolute Difference and Accumulate
-  InstrItinData<IIC_VABAQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1, 1]>,
-  //
-  // Double-register Integer Pair Add Long
-  InstrItinData<IIC_VPALiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-  //
-  // Quad-register Integer Pair Add Long
-  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-
-  //
-  // Double-register Integer Multiply (.8, .16)
-  InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1]>,
-  //
-  // Quad-register Integer Multiply (.8, .16)
-  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1]>,
-
-  //
-  // Double-register Integer Multiply (.32)
-  InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1]>,
-  //
-  // Quad-register Integer Multiply (.32)
-  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1]>,
-  //
-  // Double-register Integer Multiply-Accumulate (.8, .16)
-  InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1, 1]>,
-  //
-  // Double-register Integer Multiply-Accumulate (.32)
-  InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1, 1]>,
-  //
-  // Quad-register Integer Multiply-Accumulate (.8, .16)
-  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1, 1]>,
-  //
-  // Quad-register Integer Multiply-Accumulate (.32)
-  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1, 1]>,
-
-  //
-  // Move
-  InstrItinData<IIC_VMOV,     [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1]>,
-  //
-  // Move Immediate
-  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2]>,
-  //
-  // Double-register Permute Move
-  InstrItinData<IIC_VMOVD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [2, 1]>,
-  //
-  // Quad-register Permute Move
-  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [2, 1]>,
-  //
-  // Integer to Single-precision Move
-  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_LS], 4>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [6, 1]>,
-  //
-  // Integer to Double-precision Move
-  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [4, 1, 1]>,
-  //
-  // Single-precision to Integer Move
-  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [3, 1]>,
-  //
-  // Double-precision to Integer Move
-  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_LS], 3>,
-                               InstrStage<1, [SW_LS]>],
-                              [3, 4, 1]>,
-  //
-  // Integer to Lane Move
-  // FIXME: I think this is correct, but it is not clear from the tuning guide.
-  InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_LS], 4>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [6, 1]>,
-
-  //
-  // Vector narrow move
-  InstrItinData<IIC_VMOVN,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [2, 1]>,
-  //
-  // Double-register FP Unary
-  // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
-  //        and they issue on a different pipeline.
-  InstrItinData<IIC_VUNAD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1]>,
-  //
-  // Quad-register FP Unary
-  // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
-  //        and they issue on a different pipeline.
-  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1]>,
-  //
-  // Double-register FP Binary
-  // FIXME: We're using this itin for many instructions.
-  InstrItinData<IIC_VBIND,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-
-  //
-  // VPADD, etc.
-  InstrItinData<IIC_VPBIND,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-  //
-  // Double-register FP VMUL
-  InstrItinData<IIC_VFMULD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1]>,
-  //
-  // Quad-register FP Binary
-  InstrItinData<IIC_VBINQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-  //
-  // Quad-register FP VMUL
-  InstrItinData<IIC_VFMULQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1]>,
-  //
-  // Double-register FP Multiple-Accumulate
-  InstrItinData<IIC_VMACD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 1]>,
-  //
-  // Quad-register FP Multiple-Accumulate
-  InstrItinData<IIC_VMACQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 1]>,
-  //
-  // Double-register Fused FP Multiple-Accumulate
-  InstrItinData<IIC_VFMACD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 1]>,
-  //
-  // Quad-register FusedF P Multiple-Accumulate
-  InstrItinData<IIC_VFMACQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 1]>,
-  //
-  // Double-register Reciprical Step
-  InstrItinData<IIC_VRECSD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 1]>,
-  //
-  // Quad-register Reciprical Step
-  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 1]>,
-  //
-  // Double-register Permute
-  // FIXME: The latencies are unclear from the documentation.
-  InstrItinData<IIC_VPERMD,   [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [3, 4, 3, 4]>,
-  //
-  // Quad-register Permute
-  // FIXME: The latencies are unclear from the documentation.
-  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [3, 4, 3, 4]>,
-  //
-  // Quad-register Permute (3 cycle issue on A9)
-  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [3, 4, 3, 4]>,
-
-  //
-  // Double-register VEXT
-  InstrItinData<IIC_VEXTD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [2, 1, 1]>,
-  //
-  // Quad-register VEXT
-  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [2, 1, 1]>,
-  //
-  // VTB
-  InstrItinData<IIC_VTB1,     [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [2, 1, 1]>,
-  InstrItinData<IIC_VTB2,     [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 3, 3]>,
-  InstrItinData<IIC_VTB3,     [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [6, 1, 3, 5, 5]>,
-  InstrItinData<IIC_VTB4,     [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 3, 5, 7, 7]>,
-  //
-  // VTBX
-  InstrItinData<IIC_VTBX1,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [2, 1, 1]>,
-  InstrItinData<IIC_VTBX2,    [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 3, 3]>,
-  InstrItinData<IIC_VTBX3,    [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [6, 1, 3, 5, 5]>,
-  InstrItinData<IIC_VTBX4,    [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 3, 5, 7, 7]>
-]>;
-
-// ===---------------------------------------------------------------------===//
-// This following definitions describe the simple machine model which
-// will replace itineraries.
-
  // Swift machine model for scheduling and other instruction cost heuristics.
  def SwiftModel : SchedMachineModel {
    let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
    let MicroOpBufferSize = 45; // Based on NEON renamed registers.
    let LoadLatency = 3;
    let MispredictPenalty = 14; // A branch direction mispredict.
-
-  let Itineraries = SwiftItineraries;
  }
  
  // Swift predicates.
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp

index 31b65f2bfec4db593fa687bae0dcfa8d668c4aee..3180480986d64c53532e27e5a52411ebf40f88de 100644 (file)
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -319,8 +319,19 @@ bool ARMSubtarget::hasSinCos() const {
    return getTargetTriple().isiOS() && !getTargetTriple().isOSVersionLT(7, 0);
  }
  
+bool ARMSubtarget::enableMachineScheduler() const {
+  // Enable the MachineScheduler before register allocation for out-of-order
+  // architectures where we do not use the PostRA scheduler anymore (for now
+  // restricted to swift).
+  return getSchedModel().isOutOfOrder() && isSwift();
+}
+
  // This overrides the PostRAScheduler bit in the SchedModel for any CPU.
  bool ARMSubtarget::enablePostRAScheduler() const {
+  // No need for PostRA scheduling on out of order CPUs (for now restricted to
+  // swift).
+  if (getSchedModel().isOutOfOrder() && isSwift())
+    return false;
    return (!isThumb() || hasThumb2());
  }
  
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h

index 75425890a28348dd66995dab4fd4a186676cfd0d..4f9bc372e4b19f93aad4373176993196c8f0dc2b 100644 (file)
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -433,6 +433,9 @@ public:
    /// compiler runtime or math libraries.
    bool hasSinCos() const;
  
+  /// Returns true if machine scheduler should be enabled.
+  bool enableMachineScheduler() const override;
+
    /// True for some subtargets at > -O0.
    bool enablePostRAScheduler() const override;
  
diff --git a/test/CodeGen/ARM/adv-copy-opt.ll b/test/CodeGen/ARM/adv-copy-opt.ll

index f71bf78b62c4b0f69f09b29b9a61b80b0d192a1d..395be3457203bba3a018fa8107800a9202adc411 100644 (file)
--- a/test/CodeGen/ARM/adv-copy-opt.ll
+++ b/test/CodeGen/ARM/adv-copy-opt.ll
@@ -11,25 +11,25 @@
  ; r0 = r0 / r2
  ; r1 = r1 / r3
  ;
-; NOOPT: vmov  [[B:d[0-9]+]], r2, r3
-; NOOPT-NEXT: vmov     [[A:d[0-9]+]], r0, r1
+; NOOPT: vmov  [[A:d[0-9]+]], r0, r1
+; NOOPT-NEXT: vmov     [[B:d[0-9]+]], r2, r3
  ; Move the low part of B into a register.
  ; Unfortunately, we cannot express that the 's' register is the low
  ; part of B, i.e., sIdx == BIdx x 2. E.g., B = d1, B_low = s2.
  ; NOOPT-NEXT: vmov     [[B_LOW:r[0-9]+]], s{{[0-9]+}}
-; NOOPT-NEXT: vmov     [[A_LOW:r[0-9]+]], s{{[0-9]+}}
-; NOOPT-NEXT: udiv     [[RES_LOW:r[0-9]+]], [[A_LOW]], [[B_LOW]]
  ; NOOPT-NEXT: vmov     [[B_HIGH:r[0-9]+]], s{{[0-9]+}}
+; NOOPT-NEXT: vmov     [[A_LOW:r[0-9]+]], s{{[0-9]+}}
  ; NOOPT-NEXT: vmov     [[A_HIGH:r[0-9]+]], s{{[0-9]+}}
-; NOOPT-NEXT: udiv     [[RES_HIGH:r[0-9]+]], [[A_HIGH]], [[B_HIGH]]
+; NOOPT-NEXT: udiv     [[RES_LOW:r[0-9]+]], [[A_LOW]], [[B_LOW]]
  ; NOOPT-NEXT: vmov.32  [[RES:d[0-9]+]][0], [[RES_LOW]]
+; NOOPT-NEXT: udiv     [[RES_HIGH:r[0-9]+]], [[A_HIGH]], [[B_HIGH]]
  ; NOOPT-NEXT: vmov.32  [[RES]][1], [[RES_HIGH]]
  ; NOOPT-NEXT: vmov     r0, r1, [[RES]]
  ; NOOPT-NEXT: bx       lr
  ;
  ; OPT-NOT: vmov
-; OPT:         udiv    r0, r0, r2
-; OPT-NEXT: udiv       r1, r1, r3
+; OPT: udiv    r1, r1, r3
+; OPT-NEXT:    udiv    r0, r0, r2
  ; OPT-NEXT: bx lr
  define <2 x i32> @simpleVectorDiv(<2 x i32> %A, <2 x i32> %B) nounwind {
  entry:
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll

index c3de07e03b6b31b73beca190d96c66a76ce36a97..79e8e68e2f57cf1573faa5ad4442082f718e3feb 100644 (file)
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=swift     | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CORTEX
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=swift     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SWIFT
  ; Avoid some 's' 16-bit instruction which partially update CPSR (and add false
  ; dependency) when it isn't dependent on last CPSR defining instruction.
  ; rdar://8928208
@@ -7,8 +7,10 @@
  define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
   entry:
  ; CHECK-LABEL: t1:
-; CHECK: muls [[REG:(r[0-9]+)]], r3, r2
-; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r1, r0
+; CHECK-CORTEX: muls [[REG:(r[0-9]+)]], r3, r2
+; CHECK-CORTEX-NEXT: mul  [[REG2:(r[0-9]+)]], r1, r0
+; CHECK-SWIFT: muls  [[REG2:(r[0-9]+)]], r1, r0
+; CHECK-SWIFT-NEXT: mul [[REG:(r[0-9]+)]], r2, r3
  ; CHECK-NEXT: muls r0, [[REG]], [[REG2]]
    %0 = mul nsw i32 %a, %b
    %1 = mul nsw i32 %c, %d
@@ -21,8 +23,7 @@ define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
  define void @t2(i32* nocapture %ptr1, i32* %ptr2, i32 %c) nounwind {
  entry:
  ; CHECK-LABEL: t2:
-  %tobool7 = icmp eq i32* %ptr2, null
-  br i1 %tobool7, label %while.end, label %while.body
+  br label %while.body
  
  while.body:
  ; CHECK: while.body
@@ -55,8 +56,7 @@ while.end:
  define void @t3(i32* nocapture %ptr1, i32* %ptr2, i32 %c) nounwind minsize {
  entry:
  ; CHECK-LABEL: t3:
-  %tobool7 = icmp eq i32* %ptr2, null
-  br i1 %tobool7, label %while.end, label %while.body
+  br label %while.body
  
  while.body:
  ; CHECK: while.body
diff --git a/test/CodeGen/ARM/cmpxchg-idioms.ll b/test/CodeGen/ARM/cmpxchg-idioms.ll

index fb88575cab3b3c1541d4ba2ca81ec694e26fba24..81e05acfef7955e4b4662eb900db1d95e52e27bd 100644 (file)
--- a/test/CodeGen/ARM/cmpxchg-idioms.ll
+++ b/test/CodeGen/ARM/cmpxchg-idioms.ll
@@ -15,14 +15,14 @@ define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
  ; CHECK: bne [[LOOP]]
  
  ; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: movs r0, #1
  ; CHECK: dmb ish
+; CHECK: movs r0, #1
  ; CHECK: bx lr
  
  ; CHECK: [[FAILED]]:
  ; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: movs r0, #0
  ; CHECK: dmb ish
+; CHECK: movs r0, #0
  ; CHECK: bx lr
  
    %pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
@@ -34,8 +34,8 @@ define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
  define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) {
  ; CHECK-LABEL: test_return_bool:
  
-; CHECK: uxtb [[OLDBYTE:r[0-9]+]], r1
  ; CHECK: dmb ishst
+; CHECK: uxtb [[OLDBYTE:r[0-9]+]], r1
  
  ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
  ; CHECK: ldrexb [[LOADED:r[0-9]+]], [r0]
diff --git a/test/CodeGen/ARM/test-sharedidx.ll b/test/CodeGen/ARM/test-sharedidx.ll

index 377996c4c3c8be205add3cef35450bc6f684a9b4..77d0f30485df9e10cf4145f9c7aab9eda6cda155 100644 (file)
--- a/test/CodeGen/ARM/test-sharedidx.ll
+++ b/test/CodeGen/ARM/test-sharedidx.ll
@@ -20,8 +20,8 @@ entry:
  
  for.body:                                         ; preds = %entry, %for.body.3
  ; CHECK: %for.body
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
    %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
    %arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.09
    %0 = load i8, i8* %arrayidx, align 1
@@ -42,8 +42,8 @@ for.end:                                          ; preds = %for.body, %for.body
  
  for.body.1:                                       ; preds = %for.body
  ; CHECK: %for.body.1
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
    %arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %add5
    %2 = load i8, i8* %arrayidx.1, align 1
    %conv6.1 = zext i8 %2 to i32
@@ -60,8 +60,8 @@ for.body.1:                                       ; preds = %for.body
  
  for.body.2:                                       ; preds = %for.body.1
  ; CHECK: %for.body.2
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
    %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %add5.1
    %4 = load i8, i8* %arrayidx.2, align 1
    %conv6.2 = zext i8 %4 to i32
@@ -78,8 +78,8 @@ for.body.2:                                       ; preds = %for.body.1
  
  for.body.3:                                       ; preds = %for.body.2
  ; CHECK: %for.body.3
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
    %arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %add5.2
    %6 = load i8, i8* %arrayidx.3, align 1
    %conv6.3 = zext i8 %6 to i32
diff --git a/test/CodeGen/ARM/vector-load.ll b/test/CodeGen/ARM/vector-load.ll

index 17f134f458a2a66f857a9e5baa8bf89aa183a0c4..a638c2bdb9beddec6ebe3741b7347c591bca6135 100644 (file)
--- a/test/CodeGen/ARM/vector-load.ll
+++ b/test/CodeGen/ARM/vector-load.ll
@@ -238,12 +238,12 @@ define <4 x i32> @zextload_v8i8tov8i32(<4 x i8>** %ptr) {
  
  define <4 x i32> @zextload_v8i8tov8i32_fake_update(<4 x i8>** %ptr) {
  ;CHECK-LABEL: zextload_v8i8tov8i32_fake_update:
-;CHECK: ldr.w   r[[PTRREG:[0-9]+]], [r0]
+;CHECK: ldr   r[[PTRREG:[0-9]+]], [r0]
  ;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r[[PTRREG]]:32]
  ;CHECK: add.w   r[[INCREG:[0-9]+]], r[[PTRREG]], #16
-;CHECK: str.w   r[[INCREG]], [r0]
  ;CHECK: vmovl.u8        {{q[0-9]+}}, {{d[0-9]+}}
  ;CHECK: vmovl.u16       {{q[0-9]+}}, {{d[0-9]+}}
+;CHECK: str   r[[INCREG]], [r0]
         %A = load <4 x i8>*, <4 x i8>** %ptr
         %lA = load <4 x i8>, <4 x i8>* %A, align 4
         %inc = getelementptr <4 x i8>, <4 x i8>* %A, i38 4
diff --git a/test/CodeGen/ARM/vector-store.ll b/test/CodeGen/ARM/vector-store.ll

index 30baa9a20ddc127d650a5f686419bf076c48a831..161bbf1d0fde85c16417d8776be3dc00b76bddd8 100644 (file)
--- a/test/CodeGen/ARM/vector-store.ll
+++ b/test/CodeGen/ARM/vector-store.ll
@@ -228,9 +228,9 @@ define void @truncstore_v4i32tov4i8(<4 x i8>** %ptr, <4 x i32> %val) {
  ;CHECK: ldr.w   r9, [sp]
  ;CHECK: vmov    {{d[0-9]+}}, r3, r9
  ;CHECK: vmov    {{d[0-9]+}}, r1, r2
+;CHECK: ldr     r[[PTRREG:[0-9]+]], [r0]
  ;CHECK: vmovn.i32       [[VECLO:d[0-9]+]], {{q[0-9]+}}
  ;CHECK: vuzp.8  [[VECLO]], {{d[0-9]+}}
-;CHECK: ldr     r[[PTRREG:[0-9]+]], [r0]
  ;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32]
         %A = load <4 x i8>*, <4 x i8>** %ptr
          %trunc = trunc <4 x i32> %val to <4 x i8>
@@ -243,10 +243,10 @@ define void @truncstore_v4i32tov4i8_fake_update(<4 x i8>** %ptr, <4 x i32> %val)
  ;CHECK: ldr.w   r9, [sp]
  ;CHECK: vmov    {{d[0-9]+}}, r3, r9
  ;CHECK: vmov    {{d[0-9]+}}, r1, r2
-;CHECK: movs    [[IMM16:r[0-9]+]], #16
+;CHECK: ldr     r[[PTRREG:[0-9]+]], [r0]
  ;CHECK: vmovn.i32       [[VECLO:d[0-9]+]], {{q[0-9]+}}
  ;CHECK: vuzp.8  [[VECLO]], {{d[0-9]+}}
-;CHECK: ldr     r[[PTRREG:[0-9]+]], [r0]
+;CHECK: movs    [[IMM16:r[0-9]+]], #16
  ;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32], [[IMM16]]
  ;CHECK: str     r[[PTRREG]], [r0]
         %A = load <4 x i8>*, <4 x i8>** %ptr
author	Matthias Braun <matze@braunis.de>
	Fri, 17 Jul 2015 01:44:31 +0000 (01:44 +0000)
committer	Matthias Braun <matze@braunis.de>
	Fri, 17 Jul 2015 01:44:31 +0000 (01:44 +0000)
include/llvm/MC/MCSchedule.h		patch \| blob \| history
lib/Target/ARM/ARMScheduleSwift.td		patch \| blob \| history
lib/Target/ARM/ARMSubtarget.cpp		patch \| blob \| history
lib/Target/ARM/ARMSubtarget.h		patch \| blob \| history
test/CodeGen/ARM/adv-copy-opt.ll		patch \| blob \| history
test/CodeGen/ARM/avoid-cpsr-rmw.ll		patch \| blob \| history
test/CodeGen/ARM/cmpxchg-idioms.ll		patch \| blob \| history
test/CodeGen/ARM/test-sharedidx.ll		patch \| blob \| history
test/CodeGen/ARM/vector-load.ll		patch \| blob \| history
test/CodeGen/ARM/vector-store.ll		patch \| blob \| history