+ int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
+ UseMCID, UseIdx, UseAlign);
+
+ if (Latency > 1 &&
+ (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+ // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+ // variants are one cycle cheaper.
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ --Latency;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ if (ShAmt == 0 || ShAmt == 2)
+ --Latency;
+ break;
+ }
+ }
+ }
+
+ if (DefAlign < 8 && Subtarget.isCortexA9())
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::VLD1q8Pseudo:
+ case ARM::VLD1q16Pseudo:
+ case ARM::VLD1q32Pseudo:
+ case ARM::VLD1q64Pseudo:
+ case ARM::VLD1q8Pseudo_UPD:
+ case ARM::VLD1q16Pseudo_UPD:
+ case ARM::VLD1q32Pseudo_UPD:
+ case ARM::VLD1q64Pseudo_UPD:
+ case ARM::VLD2d8Pseudo:
+ case ARM::VLD2d16Pseudo:
+ case ARM::VLD2d32Pseudo:
+ case ARM::VLD2q8Pseudo:
+ case ARM::VLD2q16Pseudo:
+ case ARM::VLD2q32Pseudo:
+ case ARM::VLD2d8Pseudo_UPD:
+ case ARM::VLD2d16Pseudo_UPD:
+ case ARM::VLD2d32Pseudo_UPD:
+ case ARM::VLD2q8Pseudo_UPD:
+ case ARM::VLD2q16Pseudo_UPD:
+ case ARM::VLD2q32Pseudo_UPD:
+ case ARM::VLD3d8Pseudo:
+ case ARM::VLD3d16Pseudo:
+ case ARM::VLD3d32Pseudo:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD3d8Pseudo_UPD:
+ case ARM::VLD3d16Pseudo_UPD:
+ case ARM::VLD3d32Pseudo_UPD:
+ case ARM::VLD1d64TPseudo_UPD:
+ case ARM::VLD3q8Pseudo_UPD:
+ case ARM::VLD3q16Pseudo_UPD:
+ case ARM::VLD3q32Pseudo_UPD:
+ case ARM::VLD3q8oddPseudo:
+ case ARM::VLD3q16oddPseudo:
+ case ARM::VLD3q32oddPseudo:
+ case ARM::VLD3q8oddPseudo_UPD:
+ case ARM::VLD3q16oddPseudo_UPD:
+ case ARM::VLD3q32oddPseudo_UPD:
+ case ARM::VLD4d8Pseudo:
+ case ARM::VLD4d16Pseudo:
+ case ARM::VLD4d32Pseudo:
+ case ARM::VLD1d64QPseudo:
+ case ARM::VLD4d8Pseudo_UPD:
+ case ARM::VLD4d16Pseudo_UPD:
+ case ARM::VLD4d32Pseudo_UPD:
+ case ARM::VLD1d64QPseudo_UPD:
+ case ARM::VLD4q8Pseudo_UPD:
+ case ARM::VLD4q16Pseudo_UPD:
+ case ARM::VLD4q32Pseudo_UPD:
+ case ARM::VLD4q8oddPseudo:
+ case ARM::VLD4q16oddPseudo:
+ case ARM::VLD4q32oddPseudo:
+ case ARM::VLD4q8oddPseudo_UPD:
+ case ARM::VLD4q16oddPseudo_UPD:
+ case ARM::VLD4q32oddPseudo_UPD:
+ case ARM::VLD1DUPq8Pseudo:
+ case ARM::VLD1DUPq16Pseudo:
+ case ARM::VLD1DUPq32Pseudo:
+ case ARM::VLD1DUPq8Pseudo_UPD:
+ case ARM::VLD1DUPq16Pseudo_UPD:
+ case ARM::VLD1DUPq32Pseudo_UPD:
+ case ARM::VLD2DUPd8Pseudo:
+ case ARM::VLD2DUPd16Pseudo:
+ case ARM::VLD2DUPd32Pseudo:
+ case ARM::VLD2DUPd8Pseudo_UPD:
+ case ARM::VLD2DUPd16Pseudo_UPD:
+ case ARM::VLD2DUPd32Pseudo_UPD:
+ case ARM::VLD4DUPd8Pseudo:
+ case ARM::VLD4DUPd16Pseudo:
+ case ARM::VLD4DUPd32Pseudo:
+ case ARM::VLD4DUPd8Pseudo_UPD:
+ case ARM::VLD4DUPd16Pseudo_UPD:
+ case ARM::VLD4DUPd32Pseudo_UPD:
+ case ARM::VLD1LNq8Pseudo:
+ case ARM::VLD1LNq16Pseudo:
+ case ARM::VLD1LNq32Pseudo:
+ case ARM::VLD1LNq8Pseudo_UPD:
+ case ARM::VLD1LNq16Pseudo_UPD:
+ case ARM::VLD1LNq32Pseudo_UPD:
+ case ARM::VLD2LNd8Pseudo:
+ case ARM::VLD2LNd16Pseudo:
+ case ARM::VLD2LNd32Pseudo:
+ case ARM::VLD2LNq16Pseudo:
+ case ARM::VLD2LNq32Pseudo:
+ case ARM::VLD2LNd8Pseudo_UPD:
+ case ARM::VLD2LNd16Pseudo_UPD:
+ case ARM::VLD2LNd32Pseudo_UPD:
+ case ARM::VLD2LNq16Pseudo_UPD:
+ case ARM::VLD2LNq32Pseudo_UPD:
+ case ARM::VLD4LNd8Pseudo:
+ case ARM::VLD4LNd16Pseudo:
+ case ARM::VLD4LNd32Pseudo:
+ case ARM::VLD4LNq16Pseudo:
+ case ARM::VLD4LNq32Pseudo:
+ case ARM::VLD4LNd8Pseudo_UPD:
+ case ARM::VLD4LNd16Pseudo_UPD:
+ case ARM::VLD4LNd32Pseudo_UPD:
+ case ARM::VLD4LNq16Pseudo_UPD:
+ case ARM::VLD4LNq32Pseudo_UPD:
+ // If the address is not 64-bit aligned, the latencies of these
+ // instructions increases by one.
+ ++Latency;
+ break;
+ }
+
+ return Latency;
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ if (MI->isCopyLike() || MI->isInsertSubreg() ||
+ MI->isRegSequence() || MI->isImplicitDef())
+ return 1;
+
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned Class = MCID.getSchedClass();
+ unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (PredCost && MCID.hasImplicitDefOfPhysReg(ARM::CPSR))
+ // When predicated, CPSR is an additional source operand for CPSR updating
+ // instructions, this apparently increases their latencies.
+ *PredCost = 1;
+ if (UOps)
+ return ItinData->getStageLatency(Class);
+ return getNumMicroOps(ItinData, MI);
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const {
+ if (!Node->isMachineOpcode())
+ return 1;
+
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ unsigned Opcode = Node->getMachineOpcode();
+ switch (Opcode) {
+ default:
+ return ItinData->getStageLatency(get(Opcode).getSchedClass());
+ case ARM::VLDMQIA:
+ case ARM::VSTMQIA:
+ return 2;
+ }