From efe7d9a12f441a256d67c4e4da494dcefca678a5 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Fri, 10 Sep 2010 05:15:04 +0000 Subject: [PATCH] Fix merging base-updates for VLDM/VSTM: Before I switched these instructions to use AddrMode4, there was a count of the registers stored in one of the operands. I changed that to just count the operands but forgot to adjust for the size of D registers. This was noticed by Evan as a performance problem but it is a potential correctness bug as well, since it is possible that this could merge a base update with a non-matching immediate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@113576 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 3 ++- test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index d1acacfb1d0..2b7645a4211 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -458,9 +458,10 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::t2STM: case ARM::VLDMS: case ARM::VSTMS: + return (MI->getNumOperands() - 4) * 4; case ARM::VLDMD: case ARM::VSTMD: - return (MI->getNumOperands() - 4) * 4; + return (MI->getNumOperands() - 4) * 8; } } diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll index 81483cb4e7c..ee63656b26d 100644 --- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll +++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll @@ -1,11 +1,15 @@ -; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | FileCheck %s @quant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1] @dequant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1] @A = external global [4 x [4 x i32]] ; <[4 x [4 x i32]]*> [#uses=1] +; CHECK: dct_luma_sp: define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) { entry: +; Make sure to use base-updating stores for saving callee-saved registers. +; CHECK-NOT: sub sp +; CHECK: vstmdb sp! %predicted_block = alloca [4 x [4 x i32]], align 4 ; <[4 x [4 x i32]]*> [#uses=1] br label %cond_next489 -- 2.34.1