Re-commit 117518 and 117519 now that ARM MC test failures are out of the way.
authorEvan Cheng <evan.cheng@apple.com>
Thu, 28 Oct 2010 06:47:08 +0000 (06:47 +0000)
committerEvan Cheng <evan.cheng@apple.com>
Thu, 28 Oct 2010 06:47:08 +0000 (06:47 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@117531 91177308-0d34-0410-b5e6-96231b3b80d8

12 files changed:
lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
lib/Target/ARM/ARMBaseInstrInfo.cpp
lib/Target/ARM/ARMInstrInfo.td
lib/Target/ARM/ARMInstrThumb2.td
lib/Target/ARM/ARMScheduleA9.td
test/CodeGen/ARM/fabss.ll
test/CodeGen/ARM/fadds.ll
test/CodeGen/ARM/fdivs.ll
test/CodeGen/ARM/fmacs.ll
test/CodeGen/ARM/fmscs.ll
test/CodeGen/ARM/fmuls.ll
test/CodeGen/ARM/shifter_operand.ll

index 0ffb4da0f3613d3dc044094f42f7629526cd533f..d34a52d80144997a85a1d7939dbef19eb883fdc6 100644 (file)
@@ -454,6 +454,9 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
     return;
 
   unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
+  if (Use->isMachineOpcode())
+    // Adjust the use operand index by num of defs.
+    OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
   int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
   if (Latency >= 0)
     dep.setLatency(Latency);
index 1c89b97b817f160aee37a016d826176ca0539f14..51db6775817b4acd9c395514ba313ccdaef47cef 100644 (file)
@@ -1823,8 +1823,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   // This may be a def / use of a variable_ops instruction, the operand
   // latency might be determinable dynamically. Let the target try to
   // figure it out.
-  bool LdmBypass = false;
   int DefCycle = -1;
+  bool LdmBypass = false;
   switch (DefTID.getOpcode()) {
   default:
     DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
@@ -1922,8 +1922,38 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     ? (*DefMI->memoperands_begin())->getAlignment() : 0;
   unsigned UseAlign = UseMI->hasOneMemOperand()
     ? (*UseMI->memoperands_begin())->getAlignment() : 0;
-  return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
-                           UseTID, UseIdx, UseAlign);
+  int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
+                                  UseTID, UseIdx, UseAlign);
+
+  if (Latency > 1 &&
+      (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+    // variants are one cycle cheaper.
+    switch (DefTID.getOpcode()) {
+    default: break;
+    case ARM::LDRrs:
+    case ARM::LDRBrs: {
+      unsigned ShOpVal = DefMI->getOperand(3).getImm();
+      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+      if (ShImm == 0 ||
+          (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+        --Latency;
+      break;
+    }
+    case ARM::t2LDRs:
+    case ARM::t2LDRBs:
+    case ARM::t2LDRHs:
+    case ARM::t2LDRSHs: {
+      // Thumb2 mode: lsl only.
+      unsigned ShAmt = DefMI->getOperand(3).getImm();
+      if (ShAmt == 0 || ShAmt == 2)
+        --Latency;
+      break;
+    }
+    }
+  }
+
+  return Latency;
 }
 
 int
@@ -1947,8 +1977,40 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
   unsigned UseAlign = !UseMN->memoperands_empty()
     ? (*UseMN->memoperands_begin())->getAlignment() : 0;
-  return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
-                           UseTID, UseIdx, UseAlign);
+  int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
+                                  UseTID, UseIdx, UseAlign);
+
+  if (Latency > 1 &&
+      (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+    // variants are one cycle cheaper.
+    switch (DefTID.getOpcode()) {
+    default: break;
+    case ARM::LDRrs:
+    case ARM::LDRBrs: {
+      unsigned ShOpVal =
+        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+      if (ShImm == 0 ||
+          (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+        --Latency;
+      break;
+    }
+    case ARM::t2LDRs:
+    case ARM::t2LDRBs:
+    case ARM::t2LDRHs:
+    case ARM::t2LDRSHs: {
+      // Thumb2 mode: lsl only.
+      unsigned ShAmt =
+        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+      if (ShAmt == 0 || ShAmt == 2)
+        --Latency;
+      break;
+    }
+    }
+  }
+
+  return Latency;
 }
 
 bool ARMBaseInstrInfo::
index 0974890e9562ce4c42a89f7063b6ab60cbfa520e..ffd496206836367887670acbc2347a3e1f76059b 100644 (file)
@@ -1438,13 +1438,13 @@ def RFE  : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base),
 // Load
 
 
-defm LDR  : AI_ldr1<0, "ldr", IIC_iLoad_i, IIC_iLoad_r,
+defm LDR  : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si,
                     UnOpFrag<(load node:$Src)>>;
-defm LDRB : AI_ldr1<1, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_r,
+defm LDRB : AI_ldr1<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si,
                     UnOpFrag<(zextloadi8 node:$Src)>>;
-defm STR  : AI_str1<0, "str", IIC_iStore_i, IIC_iStore_r,
+defm STR  : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si,
                    BinOpFrag<(store node:$LHS, node:$RHS)>>;
-defm STRB : AI_str1<1, "strb", IIC_iStore_bh_i, IIC_iStore_bh_r,
+defm STRB : AI_str1<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si,
                    BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
 
 // Special LDR for loads from non-pc-relative constpools.
index a209fb5412af1e77197967405cb9fe945ff10b25..21b834731e2221ff38582444f9d5ac7ef8c03906 100644 (file)
@@ -574,7 +574,7 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc,
 
 /// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
 multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
-                  InstrItinClass iii, InstrItinClass iir, PatFrag opnode> {
+                  InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
   def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr), iii,
                    opc, ".w\t$dst, $addr",
                    [(set GPR:$dst, (opnode t2addrmode_imm12:$addr))]> {
@@ -599,7 +599,7 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
     let Inst{10} = 1; // The P bit.
     let Inst{8} = 0; // The W bit.
   }
-  def s   : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), iir,
+  def s   : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), iis,
                    opc, ".w\t$dst, $addr",
                    [(set GPR:$dst, (opnode t2addrmode_so_reg:$addr))]> {
     let Inst{31-27} = 0b11111;
@@ -626,7 +626,7 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
 
 /// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns.
 multiclass T2I_st<bits<2> opcod, string opc,
-                  InstrItinClass iii, InstrItinClass iir, PatFrag opnode> {
+                  InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
   def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr), iii,
                    opc, ".w\t$src, $addr",
                    [(opnode GPR:$src, t2addrmode_imm12:$addr)]> {
@@ -647,7 +647,7 @@ multiclass T2I_st<bits<2> opcod, string opc,
     let Inst{10} = 1; // The P bit.
     let Inst{8} = 0; // The W bit.
   }
-  def s   : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), iir,
+  def s   : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), iis,
                    opc, ".w\t$src, $addr",
                    [(opnode GPR:$src, t2addrmode_so_reg:$addr)]> {
     let Inst{31-27} = 0b11111;
@@ -916,19 +916,19 @@ def t2UDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi,
 
 // Load
 let canFoldAsLoad = 1, isReMaterializable = 1  in
-defm t2LDR   : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_r,
+defm t2LDR   : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si,
                       UnOpFrag<(load node:$Src)>>;
 
 // Loads with zero extension
-defm t2LDRH  : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_r,
+defm t2LDRH  : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
                       UnOpFrag<(zextloadi16 node:$Src)>>;
-defm t2LDRB  : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_r,
+defm t2LDRB  : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
                       UnOpFrag<(zextloadi8  node:$Src)>>;
 
 // Loads with sign extension
-defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_r,
+defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
                       UnOpFrag<(sextloadi16 node:$Src)>>;
-defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_r,
+defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
                       UnOpFrag<(sextloadi8  node:$Src)>>;
 
 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
@@ -1070,11 +1070,11 @@ def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>;
 def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>;
 
 // Store
-defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_r,
+defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si,
                    BinOpFrag<(store node:$LHS, node:$RHS)>>;
-defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_r,
+defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si,
                    BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
-defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_r,
+defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
                    BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
 
 // Store doubleword
index 548bc7cd60300cab2e1c38ae1e26f59d6d806e38..20aa64163bf41bfbc20acaaacd408cca6afe5583 100644 (file)
@@ -574,7 +574,7 @@ def CortexA9Itineraries : ProcessorItineraries<
                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_NPipe]>],
-                              [8, 0, 1, 1]>,
+                              [8, 1, 1, 1]>,
   //
   // Double-precision FP MAC
   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
@@ -582,7 +582,7 @@ def CortexA9Itineraries : ProcessorItineraries<
                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
                                InstrStage<2,  [A9_NPipe]>],
-                              [9, 0, 1, 1]>,
+                              [9, 1, 1, 1]>,
   //
   // Single-precision FP DIV
   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
index dfc1e0a957c3de668fb8a6cd7f260d8476ab1718..f03282bdab7f4c4020dd55194f6c815c9a27309b 100644 (file)
@@ -24,4 +24,4 @@ declare float @fabsf(float)
 ; CORTEXA8: test:
 ; CORTEXA8:    vabs.f32        d1, d1
 ; CORTEXA9: test:
-; CORTEXA9:    vabs.f32        s0, s0
+; CORTEXA9:    vabs.f32        s1, s1
index 113f0e29bd15800e00b1726c788d18d7bd87cf33..749690e98d0f4fb638a2b9b2cabb9780b8a3df59 100644 (file)
@@ -20,4 +20,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8:    vadd.f32        d0, d1, d0
 ; CORTEXA9: test:
-; CORTEXA9:    vadd.f32        s0, s0, s1
+; CORTEXA9:    vadd.f32        s0, s1, s0
index 9af1217de1d080862b9bd2221bd46519f6e9ca73..0c31495792979b2a62c008fe10a35b4eaa288cad 100644 (file)
@@ -20,4 +20,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8:    vdiv.f32        s0, s1, s0
 ; CORTEXA9: test:
-; CORTEXA9:    vdiv.f32        s0, s0, s1
+; CORTEXA9:    vdiv.f32        s0, s1, s0
index c4ceca9828b06d50c0462fe5dcfda60d8a48c6de..f8b47b5bac0deea64047fc2eeb05561f6fa82c3a 100644 (file)
@@ -21,4 +21,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8:    vmul.f32        d0, d1, d0
 ; CORTEXA9: test:
-; CORTEXA9:    vmla.f32        s0, s1, s2
+; CORTEXA9:    vmla.f32        s2, s1, s0
index 19359a1ae6bcb9400debd9d5eb968d31e3c1ca80..7a70543dee6cf4b96891d5a1a09b4170fdb92627 100644 (file)
@@ -19,6 +19,6 @@ entry:
 ; NFP0:        vnmls.f32       s2, s1, s0
 
 ; CORTEXA8: test:
-; CORTEXA8:    vnmls.f32       s1, s2, s0
+; CORTEXA8:    vnmls.f32       s2, s1, s0
 ; CORTEXA9: test:
-; CORTEXA9:    vnmls.f32       s0, s1, s2
+; CORTEXA9:    vnmls.f32       s2, s1, s0
index bfafd20c8602c51e5dbc6adf60a1b1e2fa1b5c80..ef4e3e52818e84cc779dfcdba10d377ff2c46d9c 100644 (file)
@@ -20,4 +20,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8:    vmul.f32        d0, d1, d0
 ; CORTEXA9: test:
-; CORTEXA9:    vmul.f32        s0, s0, s1
+; CORTEXA9:    vmul.f32        s0, s1, s0
index 897fb1af01cf59b2676764f172011d196e0a6b68..01e3a922f65681cd0fa8f2612023c4d5b74282d3 100644 (file)
@@ -36,8 +36,8 @@ entry:
 
 ; lsl #2 is free
 ; A9: test3:
-; A9: ldr r1, [r1, r2, lsl #2]
 ; A9: ldr r0, [r0, r2, lsl #2]
+; A9: ldr r1, [r1, r2, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*