From f3296cdf519636bde7e4c6fbb77f10f4e3875c19 Mon Sep 17 00:00:00 2001 From: Manuel Jacob Date: Sat, 12 Dec 2015 21:33:31 +0000 Subject: [PATCH] Partially fix memcpy / memset / memmove lowering in SelectionDAG construction if address space != 0. Summary: Previously SelectionDAGBuilder asserted that the pointer operands of memcpy / memset / memmove intrinsics are in address space < 256. This assert implicitly assumed the X86 backend, where all address spaces < 256 are equivalent to address space 0 from the code generator's point of view. On some targets (R600 and NVPTX) several address spaces < 256 have a target-defined meaning, so this assert made little sense for these targets. This patch removes this wrong assertion and adds extra checks before lowering these intrinsics to library calls. If a pointer operand can't be casted to address space 0 without changing semantics, a fatal error is reported to the user. The new behavior should be valid for all targets that give address spaces != 0 a target-specified meaning (NVPTX, R600, X86). NVPTX lowers big or variable-sized memory intrinsics before SelectionDAG construction. All other memory intrinsics are inlined (the threshold is set very high for this target). R600 doesn't support memcpy / memset / memmove library calls (previously the illegal emission of a call to such library function triggered an error somewhere in the code generator). X86 now emits inline loads and stores for address spaces 256 and 257 up to the same threshold that is used for address space 0 and reports a fatal error otherwise. I call this a "partial fix" because there are still cases that can't be lowered. A fatal error is reported in these cases. Reviewers: arsenm, theraven, compnerd, hfinkel Subscribers: hfinkel, llvm-commits, alex Differential Revision: http://reviews.llvm.org/D7241 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255441 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 18 +++++++++++++++ .../SelectionDAG/SelectionDAGBuilder.cpp | 22 ------------------- test/CodeGen/X86/memcpy.ll | 13 +++++++++++ 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 4596b8eba1a..abbc48e10e4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4557,6 +4557,16 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } +static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, + unsigned AS) { + // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all + // pointer operands can be losslessly bitcasted to pointers of address space 0 + if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) { + report_fatal_error("cannot lower memory intrinsic in address space " + + Twine(AS)); + } +} + SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, @@ -4598,6 +4608,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, true, DstPtrInfo, SrcPtrInfo); } + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace()); + // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc // memcpy is not guaranteed to be safe. libc memcpys aren't required to // respect volatile, so they may do things like read or write memory @@ -4659,6 +4672,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, return Result; } + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace()); + // FIXME: If the memmove is volatile, lowering it to plain libc memmove may // not be safe. See memcpy above for more details. @@ -4716,6 +4732,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, return Result; } + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + // Emit a library call. Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 506115c7856..77b52c0f2e3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4374,14 +4374,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::longjmp: return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { - // FIXME: this definition of "user defined address space" is x86-specific - // Assert for address < 256 since we support only user defined address - // spaces. - assert(cast(I.getArgOperand(0)->getType())->getAddressSpace() - < 256 && - cast(I.getArgOperand(1)->getType())->getAddressSpace() - < 256 && - "Unknown address space"); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); @@ -4398,12 +4390,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memset: { - // FIXME: this definition of "user defined address space" is x86-specific - // Assert for address < 256 since we support only user defined address - // spaces. - assert(cast(I.getArgOperand(0)->getType())->getAddressSpace() - < 256 && - "Unknown address space"); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); @@ -4418,14 +4404,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memmove: { - // FIXME: this definition of "user defined address space" is x86-specific - // Assert for address < 256 since we support only user defined address - // spaces. - assert(cast(I.getArgOperand(0)->getType())->getAddressSpace() - < 256 && - cast(I.getArgOperand(1)->getType())->getAddressSpace() - < 256 && - "Unknown address space"); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll index 00669443d6d..4351014192b 100644 --- a/test/CodeGen/X86/memcpy.ll +++ b/test/CodeGen/X86/memcpy.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=DARWIN declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p256i8.p256i8.i64(i8 addrspace(256)* nocapture, i8 addrspace(256)* nocapture, i64, i32, i1) nounwind ; Variable memcpy's should lower to calls. @@ -138,3 +139,15 @@ define void @PR15348(i8* %a, i8* %b) { call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 17, i32 0, i1 false) ret void } + +; Memcpys from / to address space 256 should be lowered to appropriate loads / +; stores if small enough. +define void @addrspace256(i8 addrspace(256)* %a, i8 addrspace(256)* %b) nounwind { + tail call void @llvm.memcpy.p256i8.p256i8.i64(i8 addrspace(256)* %a, i8 addrspace(256)* %b, i64 16, i32 8, i1 false) + ret void +; LINUX-LABEL: addrspace256: +; LINUX: movq %gs: +; LINUX: movq %gs: +; LINUX: movq {{.*}}, %gs: +; LINUX: movq {{.*}}, %gs: +} -- 2.34.1