From 26cc5bea3921f373be326e750bffd9f9a6d23313 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 2 Dec 2014 21:36:24 +0000 Subject: [PATCH] [AArch64][Stackmaps] Optimize stackmap shadows on AArch64. Reduce the number of nops emitted for stackmap shadows on AArch64 by counting non-stackmap instructions up to the next branch target towards the requested shadow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223156 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64AsmPrinter.cpp | 17 ++++++++++++++++- test/CodeGen/AArch64/arm64-stackmap-nops.ll | 15 +++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/AArch64/arm64-stackmap-nops.ll diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 8bee4f51351..08ee687d84a 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -381,8 +381,23 @@ void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, unsigned NumNOPBytes = MI.getOperand(1).getImm(); SM.recordStackMap(MI); - // Emit padding. assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); + + // Scan ahead to trim the shadow. + const MachineBasicBlock &MBB = *MI.getParent(); + MachineBasicBlock::const_iterator MII(MI); + ++MII; + while (NumNOPBytes > 0) { + if (MII == MBB.end() || MII->isCall() || + MII->getOpcode() == AArch64::DBG_VALUE || + MII->getOpcode() == TargetOpcode::PATCHPOINT || + MII->getOpcode() == TargetOpcode::STACKMAP) + break; + ++MII; + NumNOPBytes -= 4; + } + + // Emit nops. for (unsigned i = 0; i < NumNOPBytes; i += 4) EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0)); } diff --git a/test/CodeGen/AArch64/arm64-stackmap-nops.ll b/test/CodeGen/AArch64/arm64-stackmap-nops.ll new file mode 100644 index 00000000000..5915b64edf0 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-stackmap-nops.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s + +define void @test_shadow_optimization() { +entry: +; Expect 8 bytes worth of nops here rather than 16: With the shadow optimization +; in place, 8 bytes will be consumed by the frame teardown and return instr. +; CHECK-LABEL: test_shadow_optimization: +; CHECK: nop +; CHECK-NEXT: nop +; CHECK-NOT: nop + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 16) + ret void +} + +declare void @llvm.experimental.stackmap(i64, i32, ...) -- 2.34.1