From 3768c07818ad882f8bcc97e0fb35e358fd799b27 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 15 Dec 2015 22:01:29 +0000 Subject: [PATCH] [WebAssembly] Implement instruction selection for constant offsets in addresses. Add instruction patterns for matching load and store instructions with constant offsets in addresses. The code is fairly redundant due to the need to replicate everything between imm, tglobaldadr, and texternalsym, but this appears to be common tablegen practice. The main alternative appears to be to introduce matching functions with C++ code, but sticking with purely generated matchers seems better for now. Also note that this doesn't yet support offsets from getelementptr, which will be the most common case; that will depend on a change in target-independent code in order to set the NoUnsignedWrap flag, which I'll submit separately. Until then, the testcase uses ptrtoint+add+inttoptr with a nuw on the add. Also implement isLegalAddressingMode with an approximation of this. Differential Revision: http://reviews.llvm.org/D15538 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255681 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/WebAssembly/README.txt | 5 - .../WebAssembly/WebAssemblyISelLowering.cpp | 27 +- .../WebAssembly/WebAssemblyISelLowering.h | 2 + .../WebAssembly/WebAssemblyInstrMemory.td | 375 +++++++++++++++++- test/CodeGen/WebAssembly/global.ll | 4 +- test/CodeGen/WebAssembly/offset.ll | 185 +++++++++ test/CodeGen/WebAssembly/store-results.ll | 4 +- 7 files changed, 574 insertions(+), 28 deletions(-) create mode 100644 test/CodeGen/WebAssembly/offset.ll diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt index 0e7aa23ac3f..b97ea454165 100644 --- a/lib/Target/WebAssembly/README.txt +++ b/lib/Target/WebAssembly/README.txt @@ -38,11 +38,6 @@ many set_local instructions are implicit! //===---------------------------------------------------------------------===// -Load and store instructions can have a constant offset. We should (a) model -this, and (b) do address-mode folding with it. - -//===---------------------------------------------------------------------===// - Br, br_if, and tableswitch instructions can support having a value on the expression stack across the jump (sometimes). We should (a) model this, and (b) extend the stackifier to utilize it. diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index a5ca08f12bd..597d6f4fd1d 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -261,6 +261,24 @@ bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const { return true; } +bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, + Type *Ty, + unsigned AS) const { + // WebAssembly offsets are added as unsigned without wrapping. The + // isLegalAddressingMode gives us no way to determine if wrapping could be + // happening, so we approximate this by accepting only non-negative offsets. + if (AM.BaseOffs < 0) + return false; + + // WebAssembly has no scale register operands. + if (AM.Scale != 0) + return false; + + // Everything else is legal. + return true; +} + //===----------------------------------------------------------------------===// // WebAssembly Lowering private implementation. //===----------------------------------------------------------------------===// @@ -408,7 +426,8 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, if (In.Flags.isInConsecutiveRegs()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values"); if (In.Flags.isInConsecutiveRegsLast()) - fail(DL, DAG, "WebAssembly hasn't implemented cons regs last return values"); + fail(DL, DAG, + "WebAssembly hasn't implemented cons regs last return values"); // Ignore In.getOrigAlign() because all our arguments are passed in // registers. Tys.push_back(In.VT); @@ -551,9 +570,9 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, assert(GA->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); if (GA->getAddressSpace() != 0) fail(DL, DAG, "WebAssembly only expects the 0 address space"); - return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, - DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, - GA->getOffset())); + return DAG.getNode( + WebAssemblyISD::Wrapper, DL, VT, + DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset())); } SDValue diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h index 370c60e24e5..e7232a042e1 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -54,6 +54,8 @@ private: StringRef Constraint, MVT VT) const override; bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, + unsigned AS) const override; SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index 0e7768eee5a..85fd1f5078b 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -14,14 +14,20 @@ // TODO: // - HasAddr64 -// - WebAssemblyTargetLowering::isLegalAddressingMode // - WebAssemblyTargetLowering having to do with atomics -// - Each has optional alignment and immediate byte offset. +// - Each has optional alignment. // WebAssembly has i8/i16/i32/i64/f32/f64 memory types, but doesn't have i8/i16 // local types. These memory-only types instead zero- or sign-extend into local // types when loading, and truncate when storing. +// WebAssembly constant offsets are performed as unsigned with infinite +// precision, so we need to check for NoUnsignedWrap so that we don't fold an +// offset for an add that needs wrapping. +def regPlusImm : PatFrag<(ops node:$off, node:$addr), + (add node:$addr, node:$off), + [{ return N->getFlags()->hasNoUnsignedWrap(); }]>; + let Defs = [ARGUMENTS] in { // Basic load. @@ -34,6 +40,64 @@ def LOAD_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr), [], def LOAD_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr), [], "f64.load\t$dst, $off($addr)">; +} // Defs = [ARGUMENTS] + +// Select loads with no constant offset. +def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr)>; +def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr)>; +def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr)>; +def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr)>; + +// Select loads with a constant offset. +def : Pat<(i32 (load (regPlusImm imm:$off, I32:$addr))), + (LOAD_I32 imm:$off, $addr)>; +def : Pat<(i64 (load (regPlusImm imm:$off, I32:$addr))), + (LOAD_I64 imm:$off, $addr)>; +def : Pat<(f32 (load (regPlusImm imm:$off, I32:$addr))), + (LOAD_F32 imm:$off, $addr)>; +def : Pat<(f64 (load (regPlusImm imm:$off, I32:$addr))), + (LOAD_F64 imm:$off, $addr)>; +def : Pat<(i32 (load (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (load (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD_I64 tglobaladdr:$off, $addr)>; +def : Pat<(f32 (load (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD_F32 tglobaladdr:$off, $addr)>; +def : Pat<(f64 (load (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD_F64 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (load (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD_I32 texternalsym:$off, $addr)>; +def : Pat<(i64 (load (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD_I64 texternalsym:$off, $addr)>; +def : Pat<(f32 (load (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD_F32 texternalsym:$off, $addr)>; +def : Pat<(f64 (load (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD_F64 texternalsym:$off, $addr)>; + +// Select loads with just a constant offset. +def : Pat<(i32 (load imm:$off)), (LOAD_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (load imm:$off)), (LOAD_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(f32 (load imm:$off)), (LOAD_F32 imm:$off, (CONST_I32 0))>; +def : Pat<(f64 (load imm:$off)), (LOAD_F64 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (load (WebAssemblywrapper tglobaladdr:$off))), + (LOAD_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (load (WebAssemblywrapper tglobaladdr:$off))), + (LOAD_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(f32 (load (WebAssemblywrapper tglobaladdr:$off))), + (LOAD_F32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(f64 (load (WebAssemblywrapper tglobaladdr:$off))), + (LOAD_F64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (load (WebAssemblywrapper texternalsym:$off))), + (LOAD_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (load (WebAssemblywrapper texternalsym:$off))), + (LOAD_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(f32 (load (WebAssemblywrapper texternalsym:$off))), + (LOAD_F32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(f64 (load (WebAssemblywrapper texternalsym:$off))), + (LOAD_F64 texternalsym:$off, (CONST_I32 0))>; + +let Defs = [ARGUMENTS] in { + // Extending load. def LOAD8_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], "i32.load8_s\t$dst, $off($addr)">; @@ -58,12 +122,6 @@ def LOAD32_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], } // Defs = [ARGUMENTS] -// Select loads with no constant offset. -def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr)>; -def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr)>; -def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr)>; -def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr)>; - // Select extending loads with no constant offset. def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, $addr)>; def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>; @@ -76,13 +134,189 @@ def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>; def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr)>; def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>; -// "Don't care" extending load become zero-extending load. +// Select extending loads with a constant offset. +def : Pat<(i32 (sextloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_S_I32 imm:$off, $addr)>; +def : Pat<(i32 (zextloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_U_I32 imm:$off, $addr)>; +def : Pat<(i32 (sextloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_S_I32 imm:$off, $addr)>; +def : Pat<(i32 (zextloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_U_I32 imm:$off, $addr)>; +def : Pat<(i64 (sextloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_S_I64 imm:$off, $addr)>; +def : Pat<(i64 (zextloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_U_I64 imm:$off, $addr)>; +def : Pat<(i64 (sextloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_S_I64 imm:$off, $addr)>; +def : Pat<(i64 (zextloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_U_I64 imm:$off, $addr)>; +def : Pat<(i64 (sextloadi32 (regPlusImm imm:$off, I32:$addr))), + (LOAD32_S_I64 imm:$off, $addr)>; +def : Pat<(i64 (zextloadi32 (regPlusImm imm:$off, I32:$addr))), + (LOAD32_U_I64 imm:$off, $addr)>; +def : Pat<(i32 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_S_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_U_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_S_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_U_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_S_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_S_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (sextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD32_S_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (zextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD32_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_S_I32 texternalsym:$off, $addr)>; +def : Pat<(i32 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_U_I32 texternalsym:$off, $addr)>; +def : Pat<(i32 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_S_I32 texternalsym:$off, $addr)>; +def : Pat<(i32 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_U_I32 texternalsym:$off, $addr)>; +def : Pat<(i64 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_S_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_U_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_S_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_U_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (sextloadi32 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD32_S_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (zextloadi32 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD32_U_I64 texternalsym:$off, $addr)>; + +// Select extending loads with just a constant offset. +def : Pat<(i32 (sextloadi8 imm:$off)), (LOAD8_S_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi16 imm:$off)), (LOAD16_S_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi8 imm:$off)), (LOAD8_S_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi16 imm:$off)), (LOAD16_S_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi32 imm:$off)), (LOAD32_S_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_S_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_S_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_S_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_S_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi32 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD32_S_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi32 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_S_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_S_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_S_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_S_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi32 (WebAssemblywrapper texternalsym:$off))), + (LOAD32_S_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi32 (WebAssemblywrapper texternalsym:$off))), + (LOAD32_U_I64 texternalsym:$off, (CONST_I32 0))>; + +// Resolve "don't care" extending loads to zero-extending loads. This is +// somewhat arbitrary, but zero-extending is conceptually simpler. + +// Select "don't care" extending loads with no constant offset. def : Pat<(i32 (extloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>; def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>; def : Pat<(i64 (extloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>; def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>; def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>; +// Select "don't care" extending loads with a constant offset. +def : Pat<(i32 (extloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_U_I32 imm:$off, $addr)>; +def : Pat<(i32 (extloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_U_I32 imm:$off, $addr)>; +def : Pat<(i64 (extloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_U_I64 imm:$off, $addr)>; +def : Pat<(i64 (extloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_U_I64 imm:$off, $addr)>; +def : Pat<(i64 (extloadi32 (regPlusImm imm:$off, I32:$addr))), + (LOAD32_U_I64 imm:$off, $addr)>; +def : Pat<(i32 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_U_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_U_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (extloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD32_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_U_I32 texternalsym:$off, $addr)>; +def : Pat<(i32 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_U_I32 texternalsym:$off, $addr)>; +def : Pat<(i64 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_U_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_U_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (extloadi32 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD32_U_I64 texternalsym:$off, $addr)>; + +// Select "don't care" extending loads with just a constant offset. +def : Pat<(i32 (extloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi32 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi32 (WebAssemblywrapper texternalsym:$off))), + (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>; + let Defs = [ARGUMENTS] in { // Basic store. @@ -102,17 +336,63 @@ def STORE_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr, F64:$val), [], } // Defs = [ARGUMENTS] +// Select stores with no constant offset. def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, I32:$addr, I32:$val)>; def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, I32:$addr, I64:$val)>; def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, F32:$val)>; def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, F64:$val)>; -// FIXME: This pattern matches an immediate to actually use the offset field -// in the store instruction; however only unsigned offsets are supported in -// wasm, so we need to constrain the immediate we match. This may require -// custom code rather than a simple pattern. -// def : Pat<(store I32:$val, (add I32:$addr, (i32 imm:$off))), -// (STORE_I32 imm:$off, I32:$addr, I32:$val)>; +// Select stores with a constant offset. +def : Pat<(store I32:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE_I32 imm:$off, I32:$addr, I32:$val)>; +def : Pat<(store I64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE_I64 imm:$off, I32:$addr, I64:$val)>; +def : Pat<(store F32:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE_F32 imm:$off, I32:$addr, F32:$val)>; +def : Pat<(store F64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE_F64 imm:$off, I32:$addr, F64:$val)>; +def : Pat<(store I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; +def : Pat<(store I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; +def : Pat<(store F32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE_F32 tglobaladdr:$off, I32:$addr, F32:$val)>; +def : Pat<(store F64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE_F64 tglobaladdr:$off, I32:$addr, F64:$val)>; +def : Pat<(store I32:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE_I32 texternalsym:$off, I32:$addr, I32:$val)>; +def : Pat<(store I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE_I64 texternalsym:$off, I32:$addr, I64:$val)>; +def : Pat<(store F32:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE_F32 texternalsym:$off, I32:$addr, F32:$val)>; +def : Pat<(store F64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE_F64 texternalsym:$off, I32:$addr, F64:$val)>; + +// Select stores with just a constant offset. +def : Pat<(store I32:$val, imm:$off), + (STORE_I32 imm:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(store I64:$val, imm:$off), + (STORE_I64 imm:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(store F32:$val, imm:$off), + (STORE_F32 imm:$off, (CONST_I32 0), F32:$val)>; +def : Pat<(store F64:$val, imm:$off), + (STORE_F64 imm:$off, (CONST_I32 0), F64:$val)>; +def : Pat<(store I32:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(store I64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(store F32:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_F32 tglobaladdr:$off, (CONST_I32 0), F32:$val)>; +def : Pat<(store F64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_F64 tglobaladdr:$off, (CONST_I32 0), F64:$val)>; +def : Pat<(store I32:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(store I64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(store F32:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE_F32 texternalsym:$off, (CONST_I32 0), F32:$val)>; +def : Pat<(store F64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE_F64 texternalsym:$off, (CONST_I32 0), F64:$val)>; let Defs = [ARGUMENTS] in { @@ -130,6 +410,7 @@ def STORE32_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [], } // Defs = [ARGUMENTS] +// Select truncating stores with no constant offset. def : Pat<(truncstorei8 I32:$val, I32:$addr), (STORE8_I32 0, I32:$addr, I32:$val)>; def : Pat<(truncstorei16 I32:$val, I32:$addr), @@ -141,6 +422,70 @@ def : Pat<(truncstorei16 I64:$val, I32:$addr), def : Pat<(truncstorei32 I64:$val, I32:$addr), (STORE32_I64 0, I32:$addr, I64:$val)>; +// Select truncating stores with a constant offset. +def : Pat<(truncstorei8 I32:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE8_I32 imm:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE16_I32 imm:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE8_I64 imm:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE16_I64 imm:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE32_I64 imm:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei8 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE8_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE16_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE8_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE16_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE32_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei8 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE8_I32 texternalsym:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE16_I32 texternalsym:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE8_I64 texternalsym:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE16_I64 texternalsym:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE32_I64 texternalsym:$off, I32:$addr, I64:$val)>; + +// Select truncating stores with just a constant offset. +def : Pat<(truncstorei8 I32:$val, imm:$off), + (STORE8_I32 imm:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei16 I32:$val, imm:$off), + (STORE16_I32 imm:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei8 I64:$val, imm:$off), + (STORE8_I64 imm:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei16 I64:$val, imm:$off), + (STORE16_I64 imm:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei32 I64:$val, imm:$off), + (STORE32_I64 imm:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE8_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE16_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE8_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE16_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE32_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE8_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE16_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE8_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE16_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE32_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; + let Defs = [ARGUMENTS] in { // Memory size. diff --git a/test/CodeGen/WebAssembly/global.ll b/test/CodeGen/WebAssembly/global.ll index c6bc359a188..32a5e5c606c 100644 --- a/test/CodeGen/WebAssembly/global.ll +++ b/test/CodeGen/WebAssembly/global.ll @@ -10,8 +10,8 @@ target triple = "wasm32-unknown-unknown" @llvm.used = appending global [1 x i32*] [i32* @g], section "llvm.metadata" ; CHECK: foo: -; CHECK: i32.const $push0=, answer{{$}} -; CHECK-NEXT: i32.load $push1=, 0($pop0){{$}} +; CHECK: i32.const $push0=, 0{{$}} +; CHECK-NEXT: i32.load $push1=, answer($pop0){{$}} ; CHECK-NEXT: return $pop1{{$}} define i32 @foo() { %a = load i32, i32* @answer diff --git a/test/CodeGen/WebAssembly/offset.ll b/test/CodeGen/WebAssembly/offset.ll new file mode 100644 index 00000000000..75a0bc9ab6c --- /dev/null +++ b/test/CodeGen/WebAssembly/offset.ll @@ -0,0 +1,185 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s + +; Test constant load and store address offsets. + +target datalayout = "e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; With an nuw add, we can fold an offset. + +; CHECK-LABEL: load_i32_with_folded_offset: +; CHECK: i32.load $push0=, 24($0){{$}} +define i32 @load_i32_with_folded_offset(i32* %p) { + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = load i32, i32* %s + ret i32 %t +} + +; Without nuw, and even with nsw, we can't fold an offset. + +; CHECK-LABEL: load_i32_with_unfolded_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.load $push2=, 0($pop1){{$}} +define i32 @load_i32_with_unfolded_offset(i32* %p) { + %q = ptrtoint i32* %p to i32 + %r = add nsw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = load i32, i32* %s + ret i32 %t +} + +; Same as above but with i64. + +; CHECK-LABEL: load_i64_with_folded_offset: +; CHECK: i64.load $push0=, 24($0){{$}} +define i64 @load_i64_with_folded_offset(i64* %p) { + %q = ptrtoint i64* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i64* + %t = load i64, i64* %s + ret i64 %t +} + +; Same as above but with i64. + +; CHECK-LABEL: load_i64_with_unfolded_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.load $push2=, 0($pop1){{$}} +define i64 @load_i64_with_unfolded_offset(i64* %p) { + %q = ptrtoint i64* %p to i32 + %r = add nsw i32 %q, 24 + %s = inttoptr i32 %r to i64* + %t = load i64, i64* %s + ret i64 %t +} + +; Same as above but with store. + +; CHECK-LABEL: store_i32_with_folded_offset: +; CHECK: i32.store $discard=, 24($0), $pop0{{$}} +define void @store_i32_with_folded_offset(i32* %p) { + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + store i32 0, i32* %s + ret void +} + +; Same as above but with store. + +; CHECK-LABEL: store_i32_with_unfolded_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.store $discard=, 0($pop1), $pop2{{$}} +define void @store_i32_with_unfolded_offset(i32* %p) { + %q = ptrtoint i32* %p to i32 + %r = add nsw i32 %q, 24 + %s = inttoptr i32 %r to i32* + store i32 0, i32* %s + ret void +} + +; Same as above but with store with i64. + +; CHECK-LABEL: store_i64_with_folded_offset: +; CHECK: i64.store $discard=, 24($0), $pop0{{$}} +define void @store_i64_with_folded_offset(i64* %p) { + %q = ptrtoint i64* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i64* + store i64 0, i64* %s + ret void +} + +; Same as above but with store with i64. + +; CHECK-LABEL: store_i64_with_unfolded_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.store $discard=, 0($pop1), $pop2{{$}} +define void @store_i64_with_unfolded_offset(i64* %p) { + %q = ptrtoint i64* %p to i32 + %r = add nsw i32 %q, 24 + %s = inttoptr i32 %r to i64* + store i64 0, i64* %s + ret void +} + +; When loading from a fixed address, materialize a zero. + +; CHECK-LABEL: load_i32_from_numeric_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.load $push1=, 42($pop0){{$}} +define i32 @load_i32_from_numeric_address() { + %s = inttoptr i32 42 to i32* + %t = load i32, i32* %s + ret i32 %t +} + +; CHECK-LABEL: load_i32_from_global_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.load $push1=, gv($pop0){{$}} +@gv = global i32 0 +define i32 @load_i32_from_global_address() { + %t = load i32, i32* @gv + ret i32 %t +} + +; CHECK-LABEL: store_i32_to_numeric_address: +; CHECK: i32.const $0=, 0{{$}} +; CHECK: i32.store $discard=, 42($0), $0{{$}} +define void @store_i32_to_numeric_address() { + %s = inttoptr i32 42 to i32* + store i32 0, i32* %s + ret void +} + +; CHECK-LABEL: store_i32_to_global_address: +; CHECK: i32.const $0=, 0{{$}} +; CHECK: i32.store $discard=, gv($0), $0{{$}} +define void @store_i32_to_global_address() { + store i32 0, i32* @gv + ret void +} + +; Fold an offset into a sign-extending load. + +; CHECK-LABEL: load_i8_s_with_folded_offset: +; CHECK: i32.load8_s $push0=, 24($0){{$}} +define i32 @load_i8_s_with_folded_offset(i8* %p) { + %q = ptrtoint i8* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i8* + %t = load i8, i8* %s + %u = sext i8 %t to i32 + ret i32 %u +} + +; Fold an offset into a zero-extending load. + +; CHECK-LABEL: load_i8_u_with_folded_offset: +; CHECK: i32.load8_u $push0=, 24($0){{$}} +define i32 @load_i8_u_with_folded_offset(i8* %p) { + %q = ptrtoint i8* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i8* + %t = load i8, i8* %s + %u = zext i8 %t to i32 + ret i32 %u +} + +; Fold an offset into a truncating store. + +; CHECK-LABEL: store_i8_with_folded_offset: +; CHECK: i32.store8 $discard=, 24($0), $pop0{{$}} +define void @store_i8_with_folded_offset(i8* %p) { + %q = ptrtoint i8* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i8* + store i8 0, i8* %s + ret void +} diff --git a/test/CodeGen/WebAssembly/store-results.ll b/test/CodeGen/WebAssembly/store-results.ll index 84f24e6e549..73479e544db 100644 --- a/test/CodeGen/WebAssembly/store-results.ll +++ b/test/CodeGen/WebAssembly/store-results.ll @@ -26,7 +26,7 @@ entry: @pos = global %class.Vec3 zeroinitializer, align 4 ; CHECK-LABEL: foo: -; CHECK: i32.store $discard=, 0($pop0), $0{{$}} +; CHECK: i32.store $discard=, pos($0), $0{{$}} define void @foo() { for.body.i: br label %for.body5.i @@ -44,7 +44,7 @@ for.cond.cleanup4.i: } ; CHECK-LABEL: bar: -; CHECK: i32.store $discard=, 0($0), $pop0{{$}} +; CHECK: i32.store $discard=, pos($0), $0{{$}} define void @bar() { for.body.i: br label %for.body5.i -- 2.34.1