From dcdd1c138cc6e10a7db98002f367331b1446f17d Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Wed, 25 Nov 2015 16:55:01 +0000 Subject: [PATCH] [WebAssembly] Support for register stackifying with load and store instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254076 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/CMakeLists.txt | 3 + lib/Target/WebAssembly/README.txt | 2 +- lib/Target/WebAssembly/WebAssembly.h | 4 + .../WebAssembly/WebAssemblyISelLowering.h | 1 + .../WebAssemblyOptimizeReturned.cpp | 73 +++++++++++++ .../WebAssembly/WebAssemblyPeephole.cpp | 77 +++++++++++++ .../WebAssembly/WebAssemblyRegStackify.cpp | 74 ++++++++----- .../WebAssembly/WebAssemblyStoreResults.cpp | 102 ++++++++++++++++++ .../WebAssembly/WebAssemblyTargetMachine.cpp | 7 ++ test/CodeGen/WebAssembly/cfg-stackify.ll | 2 +- test/CodeGen/WebAssembly/global.ll | 4 +- test/CodeGen/WebAssembly/load-ext.ll | 40 +++---- test/CodeGen/WebAssembly/load-store-i1.ll | 16 +-- test/CodeGen/WebAssembly/load.ll | 19 ++-- test/CodeGen/WebAssembly/memory-addr32.ll | 5 +- test/CodeGen/WebAssembly/memory-addr64.ll | 5 +- test/CodeGen/WebAssembly/reg-stackify.ll | 47 ++++++++ test/CodeGen/WebAssembly/returned.ll | 35 ++++++ 18 files changed, 439 insertions(+), 77 deletions(-) create mode 100644 lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp create mode 100644 lib/Target/WebAssembly/WebAssemblyPeephole.cpp create mode 100644 lib/Target/WebAssembly/WebAssemblyStoreResults.cpp create mode 100644 test/CodeGen/WebAssembly/reg-stackify.ll create mode 100644 test/CodeGen/WebAssembly/returned.ll diff --git a/lib/Target/WebAssembly/CMakeLists.txt b/lib/Target/WebAssembly/CMakeLists.txt index 82f72269d0d..6a5894958e3 100644 --- a/lib/Target/WebAssembly/CMakeLists.txt +++ b/lib/Target/WebAssembly/CMakeLists.txt @@ -20,11 +20,14 @@ add_llvm_target(WebAssemblyCodeGen WebAssemblyInstrInfo.cpp WebAssemblyMachineFunctionInfo.cpp WebAssemblyMCInstLower.cpp + WebAssemblyOptimizeReturned.cpp + WebAssemblyPeephole.cpp WebAssemblyRegisterInfo.cpp WebAssemblyRegColoring.cpp WebAssemblyRegNumbering.cpp WebAssemblyRegStackify.cpp WebAssemblySelectionDAGInfo.cpp + WebAssemblyStoreResults.cpp WebAssemblySubtarget.cpp WebAssemblyTargetMachine.cpp WebAssemblyTargetTransformInfo.cpp diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt index 8604528b2fc..bfb124d504e 100644 --- a/lib/Target/WebAssembly/README.txt +++ b/lib/Target/WebAssembly/README.txt @@ -22,7 +22,7 @@ Interesting work that remains to be done: //===---------------------------------------------------------------------===// -set_local and store instructions have a return value. We should (a) model this, +set_local instructions have a return value. We should (a) model this, and (b) write optimizations which take advantage of it. Keep in mind that many set_local instructions are implicit! diff --git a/lib/Target/WebAssembly/WebAssembly.h b/lib/Target/WebAssembly/WebAssembly.h index 59856de9553..001f9f9d4a7 100644 --- a/lib/Target/WebAssembly/WebAssembly.h +++ b/lib/Target/WebAssembly/WebAssembly.h @@ -23,13 +23,17 @@ namespace llvm { class WebAssemblyTargetMachine; class FunctionPass; +FunctionPass *createWebAssemblyOptimizeReturned(); + FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM, CodeGenOpt::Level OptLevel); +FunctionPass *createWebAssemblyStoreResults(); FunctionPass *createWebAssemblyRegStackify(); FunctionPass *createWebAssemblyRegColoring(); FunctionPass *createWebAssemblyCFGStackify(); FunctionPass *createWebAssemblyRegNumbering(); +FunctionPass *createWebAssemblyPeephole(); FunctionPass *createWebAssemblyRelooper(); diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h index 3621571151a..af5eab671f2 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -74,6 +74,7 @@ private: // Custom lowering hooks. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; }; diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp new file mode 100644 index 00000000000..dea419c5975 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp @@ -0,0 +1,73 @@ +//===-- WebAssemblyOptimizeReturned.cpp - Optimize "returned" attributes --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Optimize calls with "returned" attributes for WebAssembly. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-optimize-returned" + +namespace { +class OptimizeReturned final : public FunctionPass, + public InstVisitor { + const char *getPassName() const override { + return "WebAssembly Optimize Returned"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + FunctionPass::getAnalysisUsage(AU); + } + + bool runOnFunction(Function &F) override; + + DominatorTree *DT; + +public: + static char ID; + OptimizeReturned() : FunctionPass(ID), DT(nullptr) {} + + void visitCallSite(CallSite CS); +}; +} // End anonymous namespace + +char OptimizeReturned::ID = 0; +FunctionPass *llvm::createWebAssemblyOptimizeReturned() { + return new OptimizeReturned(); +} + +void OptimizeReturned::visitCallSite(CallSite CS) { + for (unsigned i = 0, e = CS.getNumArgOperands(); i < e; ++i) + if (CS.paramHasAttr(1 + i, Attribute::Returned)) { + Instruction *Inst = CS.getInstruction(); + Value *Arg = CS.getArgOperand(i); + // Like replaceDominatedUsesWith but using Instruction/Use dominance. + for (auto UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE;) { + Use &U = *UI++; + if (DT->dominates(Inst, U)) + U.set(Inst); + } + } +} + +bool OptimizeReturned::runOnFunction(Function &F) { + DT = &getAnalysis().getDomTree(); + visit(F); + return true; +} diff --git a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp new file mode 100644 index 00000000000..139956225b9 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -0,0 +1,77 @@ +//===-- WebAssemblyPeephole.cpp - WebAssembly Peephole Optimiztions -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Late peephole optimizations for WebAssembly. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-peephole" + +namespace { +class WebAssemblyPeephole final : public MachineFunctionPass { + const char *getPassName() const override { + return "WebAssembly late peephole optimizer"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; + WebAssemblyPeephole() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyPeephole::ID = 0; +FunctionPass *llvm::createWebAssemblyPeephole() { + return new WebAssemblyPeephole(); +} + +bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + + MachineRegisterInfo &MRI = MF.getRegInfo(); + WebAssemblyFunctionInfo &MFI = *MF.getInfo(); + + for (auto &MBB : MF) + for (auto &MI : MBB) + switch (MI.getOpcode()) { + default: + break; + case WebAssembly::STORE8_I32: + case WebAssembly::STORE16_I32: + case WebAssembly::STORE8_I64: + case WebAssembly::STORE16_I64: + case WebAssembly::STORE32_I64: + case WebAssembly::STORE_F32: + case WebAssembly::STORE_F64: + case WebAssembly::STORE_I32: + case WebAssembly::STORE_I64: { + // Store instructions return their value operand. If we ended up using + // the same register for both, replace it with a dead def so that it + // can use $discard instead. + MachineOperand &MO = MI.getOperand(0); + unsigned OldReg = MO.getReg(); + if (OldReg == MI.getOperand(2).getReg()) { + unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + MO.setReg(NewReg); + MO.setIsDead(); + MFI.stackifyVReg(NewReg); + } + } + } + + return Changed; +} diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index 5ba7c314908..ba2a0e20b2b 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -23,6 +23,7 @@ #include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_* +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -40,6 +41,7 @@ class WebAssemblyRegStackify final : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addRequired(); AU.addPreserved(); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -70,6 +72,24 @@ static void ImposeStackOrdering(MachineInstr *MI) { /*isImp=*/true)); } +// Test whether it's safe to move Def to just before Insert. Note that this +// doesn't account for physical register dependencies, because WebAssembly +// doesn't have any (other than special ones like EXPR_STACK). +// TODO: Compute memory dependencies in a way that doesn't require always +// walking the block. +// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be +// more precise. +static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, + AliasAnalysis &AA) { + bool SawStore = false, SawSideEffects = false; + MachineBasicBlock::const_iterator D(Def), I(Insert); + for (--I; I != D; --I) + SawSideEffects |= I->isSafeToMove(&AA, SawStore); + + return !(SawStore && Def->mayLoad() && !Def->isInvariantLoad(&AA)) && + !(SawSideEffects && !Def->isSafeToMove(&AA, SawStore)); +} + bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Register Stackifying **********\n" "********** Function: " @@ -78,6 +98,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; MachineRegisterInfo &MRI = MF.getRegInfo(); WebAssemblyFunctionInfo &MFI = *MF.getInfo(); + AliasAnalysis &AA = getAnalysis().getAAResults(); // Walk the instructions from the bottom up. Currently we don't look past // block boundaries, and the blocks aren't ordered so the block visitation @@ -90,12 +111,17 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { if (Insert->getOpcode() == TargetOpcode::PHI) break; + // Don't nest anything inside an inline asm, because we don't have + // constraints for $push inputs. + if (Insert->getOpcode() == TargetOpcode::INLINEASM) + break; + // Iterate through the inputs in reverse order, since we'll be pulling // operands off the stack in FIFO order. bool AnyStackified = false; for (MachineOperand &Op : reverse(Insert->uses())) { // We're only interested in explicit virtual register operands. - if (!Op.isReg() || Op.isImplicit()) + if (!Op.isReg() || Op.isImplicit() || !Op.isUse()) continue; unsigned Reg = Op.getReg(); @@ -112,6 +138,15 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { if (Def->getOpcode() == TargetOpcode::IMPLICIT_DEF) continue; + // Don't nest an INLINE_ASM def into anything, because we don't have + // constraints for $pop outputs. + if (Def->getOpcode() == TargetOpcode::INLINEASM) + continue; + + // Don't nest PHIs inside of anything. + if (Def->getOpcode() == TargetOpcode::PHI) + continue; + // Argument instructions represent live-in registers and not real // instructions. if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 || @@ -124,8 +159,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { // they be trivially clonable. // TODO: Eventually we'll relax this, to take advantage of set_local // returning its result. - bool OneUse = MRI.hasOneUse(Reg); - if (!OneUse && !Def->isMoveImmediate()) + if (!MRI.hasOneUse(Reg)) continue; // For now, be conservative and don't look across block boundaries, @@ -134,35 +168,19 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { if (Def->getParent() != &MBB && !Def->isMoveImmediate()) continue; - // For now, be simple and don't reorder loads, stores, or side effects. - // TODO: Be more aggressive. - if ((Def->mayLoad() || Def->mayStore() || - Def->hasUnmodeledSideEffects())) + // Don't move instructions that have side effects or memory dependencies + // or other complications. + if (!IsSafeToMove(Def, Insert, AA)) continue; Changed = true; AnyStackified = true; - if (OneUse) { - // Move the def down and nest it in the current instruction. - MBB.insert(MachineBasicBlock::instr_iterator(Insert), - Def->removeFromParent()); - MFI.stackifyVReg(Reg); - ImposeStackOrdering(Def); - Insert = Def; - } else { - // Clone the def down and nest it in the current instruction. - MachineInstr *Clone = MF.CloneMachineInstr(Def); - unsigned OldReg = Def->getOperand(0).getReg(); - unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); - assert(Op.getReg() == OldReg); - assert(Clone->getOperand(0).getReg() == OldReg); - Op.setReg(NewReg); - Clone->getOperand(0).setReg(NewReg); - MBB.insert(MachineBasicBlock::instr_iterator(Insert), Clone); - MFI.stackifyVReg(Reg); - ImposeStackOrdering(Clone); - Insert = Clone; - } + // Move the def down and nest it in the current instruction. + MBB.insert(MachineBasicBlock::instr_iterator(Insert), + Def->removeFromParent()); + MFI.stackifyVReg(Reg); + ImposeStackOrdering(Def); + Insert = Def; } if (AnyStackified) ImposeStackOrdering(&MI); diff --git a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp new file mode 100644 index 00000000000..d0735b84de6 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -0,0 +1,102 @@ +//===-- WebAssemblyStoreResults.cpp - Optimize using store result values --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements an optimization pass using store result values. +/// +/// WebAssembly's store instructions return the stored value, specifically to +/// enable the optimization of reducing get_local/set_local traffic, which is +/// what we're doing here. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-store-results" + +namespace { +class WebAssemblyStoreResults final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyStoreResults() : MachineFunctionPass(ID) {} + + const char *getPassName() const override { + return "WebAssembly Store Results"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: +}; +} // end anonymous namespace + +char WebAssemblyStoreResults::ID = 0; +FunctionPass *llvm::createWebAssemblyStoreResults() { + return new WebAssemblyStoreResults(); +} + +bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Store Results **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineDominatorTree &MDT = getAnalysis(); + + for (auto &MBB : MF) + for (auto &MI : MBB) + switch (MI.getOpcode()) { + default: + break; + case WebAssembly::STORE8_I32: + case WebAssembly::STORE16_I32: + case WebAssembly::STORE8_I64: + case WebAssembly::STORE16_I64: + case WebAssembly::STORE32_I64: + case WebAssembly::STORE_F32: + case WebAssembly::STORE_F64: + case WebAssembly::STORE_I32: + case WebAssembly::STORE_I64: + unsigned ToReg = MI.getOperand(0).getReg(); + unsigned FromReg = MI.getOperand(2).getReg(); + for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) { + MachineOperand &O = *I++; + MachineInstr *Where = O.getParent(); + if (Where->getOpcode() == TargetOpcode::PHI) + Where = Where->getOperand(&O - &Where->getOperand(0) + 1) + .getMBB() + ->getFirstTerminator(); + if (&MI == Where || !MDT.dominates(&MI, Where)) + continue; + O.setReg(ToReg); + } + } + + return true; +} diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index d60c41a2c87..493e4be18dc 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -143,6 +143,9 @@ void WebAssemblyPassConfig::addIRPasses() { // control specifically what gets lowered. addPass(createAtomicExpandPass(TM)); + // Optimize "returned" function attributes. + addPass(createWebAssemblyOptimizeReturned()); + TargetPassConfig::addIRPasses(); } @@ -157,6 +160,9 @@ bool WebAssemblyPassConfig::addInstSelector() { bool WebAssemblyPassConfig::addILPOpts() { return true; } void WebAssemblyPassConfig::addPreRegAlloc() { + // Prepare store instructions for register stackifying. + addPass(createWebAssemblyStoreResults()); + // Mark registers as representing wasm's expression stack. addPass(createWebAssemblyRegStackify()); } @@ -183,4 +189,5 @@ void WebAssemblyPassConfig::addPreSched2() {} void WebAssemblyPassConfig::addPreEmitPass() { addPass(createWebAssemblyCFGStackify()); addPass(createWebAssemblyRegNumbering()); + addPass(createWebAssemblyPeephole()); } diff --git a/test/CodeGen/WebAssembly/cfg-stackify.ll b/test/CodeGen/WebAssembly/cfg-stackify.ll index 4f912f32924..4a53f6e4bb9 100644 --- a/test/CodeGen/WebAssembly/cfg-stackify.ll +++ b/test/CodeGen/WebAssembly/cfg-stackify.ll @@ -174,7 +174,7 @@ exit: ; CHECK-LABEL: single_block: ; CHECK-NOT: br -; CHECK: return ${{[0-9]+}}{{$}} +; CHECK: return $pop{{[0-9]+}}{{$}} define i32 @single_block(i32* %p) { entry: store volatile i32 0, i32* %p diff --git a/test/CodeGen/WebAssembly/global.ll b/test/CodeGen/WebAssembly/global.ll index 4fd1b083591..818c454a491 100644 --- a/test/CodeGen/WebAssembly/global.ll +++ b/test/CodeGen/WebAssembly/global.ll @@ -11,8 +11,8 @@ target triple = "wasm32-unknown-unknown" ; CHECK: foo: ; CHECK: i32.const $push0=, answer{{$}} -; CHECK-NEXT: i32.load $0=, $pop0{{$}} -; CHECK-NEXT: return $0{{$}} +; CHECK-NEXT: i32.load $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} define i32 @foo() { %a = load i32, i32* @answer ret i32 %a diff --git a/test/CodeGen/WebAssembly/load-ext.ll b/test/CodeGen/WebAssembly/load-ext.ll index 8630baa2f3d..bdccfff1d16 100644 --- a/test/CodeGen/WebAssembly/load-ext.ll +++ b/test/CodeGen/WebAssembly/load-ext.ll @@ -6,8 +6,8 @@ target datalayout = "e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" ; CHECK-LABEL: sext_i8_i32: -; CHECK: i32.load8_s $0=, $0{{$}} -; CHECK-NEXT: return $0{{$}} +; CHECK: i32.load8_s $push0=, $0{{$}} +; CHECK-NEXT: return $pop0{{$}} define i32 @sext_i8_i32(i8 *%p) { %v = load i8, i8* %p %e = sext i8 %v to i32 @@ -15,8 +15,8 @@ define i32 @sext_i8_i32(i8 *%p) { } ; CHECK-LABEL: zext_i8_i32: -; CHECK: i32.load8_u $0=, $0{{$}} -; CHECK-NEXT: return $0{{$}} +; CHECK: i32.load8_u $push0=, $0{{$}} +; CHECK-NEXT: return $pop0{{$}} define i32 @zext_i8_i32(i8 *%p) { %v = load i8, i8* %p %e = zext i8 %v to i32 @@ -24,8 +24,8 @@ define i32 @zext_i8_i32(i8 *%p) { } ; CHECK-LABEL: sext_i16_i32: -; CHECK: i32.load16_s $0=, $0{{$}} -; CHECK-NEXT: return $0{{$}} +; CHECK: i32.load16_s $push0=, $0{{$}} +; CHECK-NEXT: return $pop0{{$}} define i32 @sext_i16_i32(i16 *%p) { %v = load i16, i16* %p %e = sext i16 %v to i32 @@ -33,8 +33,8 @@ define i32 @sext_i16_i32(i16 *%p) { } ; CHECK-LABEL: zext_i16_i32: -; CHECK: i32.load16_u $0=, $0{{$}} -; CHECK-NEXT: return $0{{$}} +; CHECK: i32.load16_u $push0=, $0{{$}} +; CHECK-NEXT: return $pop0{{$}} define i32 @zext_i16_i32(i16 *%p) { %v = load i16, i16* %p %e = zext i16 %v to i32 @@ -42,8 +42,8 @@ define i32 @zext_i16_i32(i16 *%p) { } ; CHECK-LABEL: sext_i8_i64: -; CHECK: i64.load8_s $1=, $0{{$}} -; CHECK-NEXT: return $1{{$}} +; CHECK: i64.load8_s $push0=, $0{{$}} +; CHECK-NEXT: return $pop0{{$}} define i64 @sext_i8_i64(i8 *%p) { %v = load i8, i8* %p %e = sext i8 %v to i64 @@ -51,8 +51,8 @@ define i64 @sext_i8_i64(i8 *%p) { } ; CHECK-LABEL: zext_i8_i64: -; CHECK: i64.load8_u $1=, $0{{$}} -; CHECK-NEXT: return $1{{$}} +; CHECK: i64.load8_u $push0=, $0{{$}} +; CHECK-NEXT: return $pop0{{$}} define i64 @zext_i8_i64(i8 *%p) { %v = load i8, i8* %p %e = zext i8 %v to i64 @@ -60,8 +60,8 @@ define i64 @zext_i8_i64(i8 *%p) { } ; CHECK-LABEL: sext_i16_i64: -; CHECK: i64.load16_s $1=, $0{{$}} -; CHECK-NEXT: return $1{{$}} +; CHECK: i64.load16_s $push0=, $0{{$}} +; CHECK-NEXT: return $pop0{{$}} define i64 @sext_i16_i64(i16 *%p) { %v = load i16, i16* %p %e = sext i16 %v to i64 @@ -69,8 +69,8 @@ define i64 @sext_i16_i64(i16 *%p) { } ; CHECK-LABEL: zext_i16_i64: -; CHECK: i64.load16_u $1=, $0{{$}} -; CHECK-NEXT: return $1{{$}} +; CHECK: i64.load16_u $push0=, $0{{$}} +; CHECK-NEXT: return $pop0{{$}} define i64 @zext_i16_i64(i16 *%p) { %v = load i16, i16* %p %e = zext i16 %v to i64 @@ -78,8 +78,8 @@ define i64 @zext_i16_i64(i16 *%p) { } ; CHECK-LABEL: sext_i32_i64: -; CHECK: i64.load32_s $1=, $0{{$}} -; CHECK-NEXT: return $1{{$}} +; CHECK: i64.load32_s $push0=, $0{{$}} +; CHECK-NEXT: return $pop0{{$}} define i64 @sext_i32_i64(i32 *%p) { %v = load i32, i32* %p %e = sext i32 %v to i64 @@ -87,8 +87,8 @@ define i64 @sext_i32_i64(i32 *%p) { } ; CHECK-LABEL: zext_i32_i64: -; CHECK: i64.load32_u $1=, $0{{$}} -; CHECK: return $1{{$}} +; CHECK: i64.load32_u $push0=, $0{{$}} +; CHECK: return $pop0{{$}} define i64 @zext_i32_i64(i32 *%p) { %v = load i32, i32* %p %e = zext i32 %v to i64 diff --git a/test/CodeGen/WebAssembly/load-store-i1.ll b/test/CodeGen/WebAssembly/load-store-i1.ll index baaff019a5e..33d3aeecc58 100644 --- a/test/CodeGen/WebAssembly/load-store-i1.ll +++ b/test/CodeGen/WebAssembly/load-store-i1.ll @@ -6,8 +6,8 @@ target datalayout = "e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" ; CHECK-LABEL: load_u_i1_i32: -; CHECK: i32.load8_u $[[NUM0:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: return $[[NUM0]]{{$}} +; CHECK: i32.load8_u $push[[NUM0:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: return $pop[[NUM0]]{{$}} define i32 @load_u_i1_i32(i1* %p) { %v = load i1, i1* %p %e = zext i1 %v to i32 @@ -15,9 +15,9 @@ define i32 @load_u_i1_i32(i1* %p) { } ; CHECK-LABEL: load_s_i1_i32: -; CHECK: i32.load8_u $[[NUM0:[0-9]+]]=, $0{{$}} +; CHECK: i32.load8_u $push[[NUM0:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: i32.const $[[NUM1:[0-9]+]]=, 31{{$}} -; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $[[NUM0]], $[[NUM1]]{{$}} +; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $[[NUM1]]{{$}} ; CHECK-NEXT: shr_s $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $[[NUM1]]{{$}} ; CHECK-NEXT: return $pop[[NUM3]]{{$}} define i32 @load_s_i1_i32(i1* %p) { @@ -27,8 +27,8 @@ define i32 @load_s_i1_i32(i1* %p) { } ; CHECK-LABEL: load_u_i1_i64: -; CHECK: i64.load8_u $[[NUM0:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: return $[[NUM0]]{{$}} +; CHECK: i64.load8_u $push[[NUM0:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: return $pop[[NUM0]]{{$}} define i64 @load_u_i1_i64(i1* %p) { %v = load i1, i1* %p %e = zext i1 %v to i64 @@ -36,9 +36,9 @@ define i64 @load_u_i1_i64(i1* %p) { } ; CHECK-LABEL: load_s_i1_i64: -; CHECK: i64.load8_u $[[NUM0:[0-9]+]]=, $0{{$}} +; CHECK: i64.load8_u $push[[NUM0:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: i64.const $[[NUM1:[0-9]+]]=, 63{{$}} -; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $[[NUM0]], $[[NUM1]]{{$}} +; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $[[NUM1]]{{$}} ; CHECK-NEXT: shr_s $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $[[NUM1]]{{$}} ; CHECK-NEXT: return $pop[[NUM3]]{{$}} define i64 @load_s_i1_i64(i1* %p) { diff --git a/test/CodeGen/WebAssembly/load.ll b/test/CodeGen/WebAssembly/load.ll index 707897b9da5..1017167d522 100644 --- a/test/CodeGen/WebAssembly/load.ll +++ b/test/CodeGen/WebAssembly/load.ll @@ -8,8 +8,8 @@ target triple = "wasm32-unknown-unknown" ; CHECK-LABEL: ldi32: ; CHECK-NEXT: .param i32{{$}} ; CHECK-NEXT: .result i32{{$}} -; CHECK-NEXT: i32.load $[[NUM:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: return $[[NUM]]{{$}} +; CHECK-NEXT: i32.load $push[[NUM:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} define i32 @ldi32(i32 *%p) { %v = load i32, i32* %p ret i32 %v @@ -18,9 +18,8 @@ define i32 @ldi32(i32 *%p) { ; CHECK-LABEL: ldi64: ; CHECK-NEXT: .param i32{{$}} ; CHECK-NEXT: .result i64{{$}} -; CHECK-NEXT: .local i64{{$}} -; CHECK-NEXT: i64.load $[[NUM:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: return $[[NUM]]{{$}} +; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} define i64 @ldi64(i64 *%p) { %v = load i64, i64* %p ret i64 %v @@ -29,9 +28,8 @@ define i64 @ldi64(i64 *%p) { ; CHECK-LABEL: ldf32: ; CHECK-NEXT: .param i32{{$}} ; CHECK-NEXT: .result f32{{$}} -; CHECK-NEXT: .local f32{{$}} -; CHECK-NEXT: f32.load $[[NUM:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: return $[[NUM]]{{$}} +; CHECK-NEXT: f32.load $push[[NUM:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} define float @ldf32(float *%p) { %v = load float, float* %p ret float %v @@ -40,9 +38,8 @@ define float @ldf32(float *%p) { ; CHECK-LABEL: ldf64: ; CHECK-NEXT: .param i32{{$}} ; CHECK-NEXT: .result f64{{$}} -; CHECK-NEXT: .local f64{{$}} -; CHECK-NEXT: f64.load $[[NUM:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: return $[[NUM]]{{$}} +; CHECK-NEXT: f64.load $push[[NUM:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} define double @ldf64(double *%p) { %v = load double, double* %p ret double %v diff --git a/test/CodeGen/WebAssembly/memory-addr32.ll b/test/CodeGen/WebAssembly/memory-addr32.ll index 674131b2bce..4b3aba92245 100644 --- a/test/CodeGen/WebAssembly/memory-addr32.ll +++ b/test/CodeGen/WebAssembly/memory-addr32.ll @@ -10,9 +10,8 @@ declare void @llvm.wasm.grow.memory.i32(i32) nounwind ; CHECK-LABEL: memory_size: ; CHECK-NEXT: .result i32{{$}} -; CHECK-NEXT: .local i32{{$}} -; CHECK-NEXT: memory_size $0={{$}} -; CHECK-NEXT: return $0{{$}} +; CHECK-NEXT: memory_size $push0={{$}} +; CHECK-NEXT: return $pop0{{$}} define i32 @memory_size() { %a = call i32 @llvm.wasm.memory.size.i32() ret i32 %a diff --git a/test/CodeGen/WebAssembly/memory-addr64.ll b/test/CodeGen/WebAssembly/memory-addr64.ll index bb3fc271994..a44c5f044e1 100644 --- a/test/CodeGen/WebAssembly/memory-addr64.ll +++ b/test/CodeGen/WebAssembly/memory-addr64.ll @@ -10,9 +10,8 @@ declare void @llvm.wasm.grow.memory.i64(i64) nounwind ; CHECK-LABEL: memory_size: ; CHECK-NEXT: .result i64{{$}} -; CHECK-NEXT: .local i64{{$}} -; CHECK-NEXT: memory_size $0={{$}} -; CHECK-NEXT: return $0{{$}} +; CHECK-NEXT: memory_size $push0={{$}} +; CHECK-NEXT: return $pop0{{$}} define i64 @memory_size() { %a = call i64 @llvm.wasm.memory.size.i64() ret i64 %a diff --git a/test/CodeGen/WebAssembly/reg-stackify.ll b/test/CodeGen/WebAssembly/reg-stackify.ll new file mode 100644 index 00000000000..f3000aab70a --- /dev/null +++ b/test/CodeGen/WebAssembly/reg-stackify.ll @@ -0,0 +1,47 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s + +; Test the register stackifier pass. + +target datalayout = "e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; No because of pointer aliasing. + +; CHECK-LABEL: no0: +; CHECK: return $1{{$}} +define i32 @no0(i32* %p, i32* %q) { + %t = load i32, i32* %q + store i32 0, i32* %p + ret i32 %t +} + +; No because of side effects. + +; CHECK-LABEL: no1: +; CHECK: return $1{{$}} +define i32 @no1(i32* %p, i32* dereferenceable(4) %q) { + %t = load volatile i32, i32* %q, !invariant.load !0 + store volatile i32 0, i32* %p + ret i32 %t +} + +; Yes because of invariant load and no side effects. + +; CHECK-LABEL: yes0: +; CHECK: return $pop0{{$}} +define i32 @yes0(i32* %p, i32* dereferenceable(4) %q) { + %t = load i32, i32* %q, !invariant.load !0 + store i32 0, i32* %p + ret i32 %t +} + +; Yes because of no intervening side effects. + +; CHECK-LABEL: yes1: +; CHECK: return $pop0{{$}} +define i32 @yes1(i32* %q) { + %t = load volatile i32, i32* %q + ret i32 %t +} + +!0 = !{} diff --git a/test/CodeGen/WebAssembly/returned.ll b/test/CodeGen/WebAssembly/returned.ll new file mode 100644 index 00000000000..d65e2a8bc3e --- /dev/null +++ b/test/CodeGen/WebAssembly/returned.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s + +; Test that the "returned" attribute is optimized effectively. + +target datalayout = "e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; CHECK-LABEL: _Z3foov: +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 1{{$}} +; CHECK-NEXT: call $push1=, _Znwm, $pop0{{$}} +; CHECK-NEXT: call $push2=, _ZN5AppleC1Ev, $pop1{{$}} +; CHECK-NEXT: return $pop2{{$}} +%class.Apple = type { i8 } +declare noalias i8* @_Znwm(i32) +declare %class.Apple* @_ZN5AppleC1Ev(%class.Apple* returned) +define %class.Apple* @_Z3foov() { +entry: + %call = tail call noalias i8* @_Znwm(i32 1) + %0 = bitcast i8* %call to %class.Apple* + %call1 = tail call %class.Apple* @_ZN5AppleC1Ev(%class.Apple* %0) + ret %class.Apple* %0 +} + +; CHECK-LABEL: _Z3barPvS_l: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: call $push0=, memcpy, $0, $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +declare i8* @memcpy(i8* returned, i8*, i32) +define i8* @_Z3barPvS_l(i8* %p, i8* %s, i32 %n) { +entry: + %call = tail call i8* @memcpy(i8* %p, i8* %s, i32 %n) + ret i8* %p +} -- 2.34.1