Fixing MinSize attribute handling was discussed in D11363.
This is a prerequisite patch to doing that.
The handling of OptSize when lowering mem* functions was broken
on Darwin because it wants to ignore -Os for these cases, but the
existing logic also made it ignore -Oz (MinSize).
The Linux change demonstrates a widespread problem. The backend
doesn't usually recognize the MinSize attribute by itself; it
assumes that if the MinSize attribute exists, then the OptSize
attribute must also exist.
Fixing this more generally will be a follow-on patch or two.
Differential Revision: http://reviews.llvm.org/D11568
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243693
91177308-0d34-0410-b5e6-
96231b3b80d8
return true;
}
+static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ bool HasMinSize = F->hasFnAttribute(Attribute::MinSize);
+ bool HasOptSize = F->hasFnAttribute(Attribute::OptimizeForSize);
+
+ // On Darwin, -Os means optimize for size without hurting performance, so
+ // only really optimize for size when -Oz (MinSize) is used.
+ if (MF.getTarget().getTargetTriple().isOSDarwin())
+ return HasMinSize;
+ return HasOptSize || HasMinSize;
+}
+
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
SDValue Chain, SDValue Dst,
SDValue Src, uint64_t Size,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptSize = shouldLowerMemFuncForSize(MF);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptSize = shouldLowerMemFuncForSize(MF);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptSize = shouldLowerMemFuncForSize(MF);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
//// temporary - rewrite interface to use type
MaxStoresPerMemset = 8;
- MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemsetOptSize = 4;
MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
- MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+ MaxStoresPerMemcpyOptSize = 2;
MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
- MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+ MaxStoresPerMemmoveOptSize = 2;
// On ARM arguments smaller than 4 bytes are extended, so all arguments
// are at least 4 bytes aligned.
computeRegisterProperties(Subtarget->getRegisterInfo());
- // On Darwin, -Os means optimize for size without hurting performance,
- // do not reduce the limit.
MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
- MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
+ MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
- MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
- MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemmoveOptSize = 4;
setPrefLoopAlignment(4); // 2^4 bytes.
// Predictable cmov don't hurt on atom because it's in-order.
; DARWIN: movq
}
-; FIXME: Both Linux and Darwin should lower to a memcpy call; minsize is on.
define void @test3_minsize(i8* nocapture %A, i8* nocapture %B) nounwind minsize noredzone {
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
ret void
; LINUX-LABEL: test3_minsize:
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
-; LINUX: movq
+; LINUX: memcpy
; DARWIN-LABEL: test3_minsize:
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
+; DARWIN: memcpy
}
-; FIXME: Darwin should lower to a memcpy call; minsize is on.
define void @test3_minsize_optsize(i8* nocapture %A, i8* nocapture %B) nounwind optsize minsize noredzone {
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
ret void
; LINUX: memcpy
; DARWIN-LABEL: test3_minsize_optsize:
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
-; DARWIN: movq
+; DARWIN: memcpy
}
; Large constant memcpy's should be inlined when not optimizing for size.