ARM cost model: Unaligned vectorized double stores are expensive
[oota-llvm.git] / lib / Target / ARM / ARMTargetTransformInfo.cpp
index 86b6215f09e0b2e4852633af2e511762650d28e6..6bbb38facc24020098253e57792b3fefb8fd006a 100644 (file)
@@ -129,6 +129,9 @@ public:
   unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
                                   OperandValueKind Op1Info = OK_AnyValue,
                                   OperandValueKind Op2Info = OK_AnyValue) const;
+
+  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                           unsigned AddressSpace) const;
   /// @}
 };
 
@@ -540,3 +543,15 @@ unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueK
   return Cost;
 }
 
+unsigned ARMTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                                 unsigned AddressSpace) const {
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+
+  if (Src->isVectorTy() && Alignment != 16 &&
+      Src->getVectorElementType()->isDoubleTy()) {
+    // Unaligned loads/stores are extremely inefficient.
+    // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
+    return LT.first * 4;
+  }
+  return LT.first;
+}