From 2dd264c8a3337523cccd2e7192721821678367b7 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 27 Jul 2014 17:46:40 +0000 Subject: [PATCH] Add alignment value to allowsUnalignedMemoryAccess Rename to allowsMisalignedMemoryAccess. On R600, 8 and 16 byte accesses are mostly OK with 4-byte alignment, and don't need to be split into multiple accesses. Vector loads with an alignment of the element type are not uncommon in OpenCL code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214055 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 7 ++++--- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 20 +++++++++++-------- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 ++-- .../SelectionDAG/SelectionDAGBuilder.cpp | 5 +++-- lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +- lib/Target/AArch64/AArch64ISelLowering.h | 7 ++++--- lib/Target/ARM/ARMISelLowering.cpp | 11 ++++++---- lib/Target/ARM/ARMISelLowering.h | 7 ++++--- lib/Target/ARM/ARMSubtarget.h | 2 +- lib/Target/Mips/Mips16ISelLowering.cpp | 7 ++++--- lib/Target/Mips/Mips16ISelLowering.h | 5 +++-- lib/Target/Mips/MipsSEISelLowering.cpp | 7 ++++--- lib/Target/Mips/MipsSEISelLowering.h | 5 +++-- lib/Target/PowerPC/PPCISelLowering.cpp | 7 ++++--- lib/Target/PowerPC/PPCISelLowering.h | 7 ++++--- lib/Target/R600/SIISelLowering.cpp | 18 +++++++++-------- lib/Target/R600/SIISelLowering.h | 5 +++-- lib/Target/SystemZ/SystemZISelLowering.cpp | 7 ++++--- lib/Target/SystemZ/SystemZISelLowering.h | 5 +++-- lib/Target/X86/X86ISelLowering.cpp | 7 ++++--- lib/Target/X86/X86ISelLowering.h | 4 ++-- lib/Target/XCore/XCoreISelLowering.cpp | 12 ++++++++--- test/CodeGen/R600/unaligned-load-store.ll | 17 ++++++++++++++++ 23 files changed, 112 insertions(+), 66 deletions(-) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 30b3982ede2..7f75fec7609 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -782,9 +782,10 @@ public: /// copy/move/set is converted to a sequence of store operations. Its use /// helps to ensure that such replacements don't generate code that causes an /// alignment error (trap) on the target machine. - virtual bool allowsUnalignedMemoryAccesses(EVT, - unsigned AddrSpace = 0, - bool * /*Fast*/ = nullptr) const { + virtual bool allowsMisalignedMemoryAccesses(EVT, + unsigned AddrSpace = 0, + unsigned Align = 1, + bool * /*Fast*/ = nullptr) const { return false; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 0275990dcdf..c36e1753766 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -724,10 +724,11 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // If this is an unaligned store and the target doesn't support it, // expand it. unsigned AS = ST->getAddressSpace(); - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) { + unsigned Align = ST->getAlignment(); + if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); - if (ST->getAlignment() < ABIAlignment) + if (Align < ABIAlignment) ExpandUnalignedStore(cast(Node), DAG, TLI, this); } @@ -835,12 +836,13 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { unsigned AS = ST->getAddressSpace(); + unsigned Align = ST->getAlignment(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) { + if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); - if (ST->getAlignment() < ABIAlignment) + if (Align < ABIAlignment) ExpandUnalignedStore(cast(Node), DAG, TLI, this); } break; @@ -886,13 +888,14 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { unsigned AS = LD->getAddressSpace(); + unsigned Align = LD->getAlignment(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT(), AS)) { + if (!TLI.allowsMisalignedMemoryAccesses(LD->getMemoryVT(), AS, Align)) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getDataLayout()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ + if (Align < ABIAlignment){ ExpandUnalignedLoad(cast(Node), DAG, TLI, RVal, RChain); } } @@ -1077,12 +1080,13 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // it, expand it. EVT MemVT = LD->getMemoryVT(); unsigned AS = LD->getAddressSpace(); - if (!TLI.allowsUnalignedMemoryAccesses(MemVT, AS)) { + unsigned Align = LD->getAlignment(); + if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getDataLayout()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ + if (Align < ABIAlignment){ ExpandUnalignedLoad(cast(Node), DAG, TLI, Value, Chain); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 5a2200440d7..6426ee57eb5 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3810,7 +3810,7 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, if (VT == MVT::Other) { unsigned AS = 0; if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) || - TLI.allowsUnalignedMemoryAccesses(VT, AS)) { + TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign)) { VT = TLI.getPointerTy(); } else { switch (DstAlign & 7) { @@ -3870,7 +3870,7 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, unsigned AS = 0; if (NumMemOps && AllowOverlap && VTSize >= 8 && NewVTSize < Size && - TLI.allowsUnalignedMemoryAccesses(VT, AS, &Fast) && Fast) + TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign, &Fast) && Fast) VTSize = Size; else { VT = NewVT; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 4ccdb1dbc77..e3d74da42f3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5725,9 +5725,10 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. + // TODO: Check alignment of src and dest ptrs. if (!TLI->isTypeLegal(LoadVT) || - !TLI->allowsUnalignedMemoryAccesses(LoadVT, SrcAS) || - !TLI->allowsUnalignedMemoryAccesses(LoadVT, DstAS)) + !TLI->allowsMisalignedMemoryAccesses(LoadVT, SrcAS) || + !TLI->allowsMisalignedMemoryAccesses(LoadVT, DstAS)) ActuallyDoIt = false; } diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 994013f79be..2c677ab5260 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6229,7 +6229,7 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat) && (memOpAlign(SrcAlign, DstAlign, 16) || - (allowsUnalignedMemoryAccesses(MVT::f128, 0, &Fast) && Fast))) + (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast))) return MVT::f128; return Size >= 8 ? MVT::i64 : MVT::i32; diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 96b1d48697d..d7020f3d30a 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -212,10 +212,11 @@ public: MVT getScalarShiftAmountTy(EVT LHSTy) const override; - /// allowsUnalignedMemoryAccesses - Returns true if the target allows + /// allowsMisalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. - bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0, - bool *Fast = nullptr) const override { + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0, + unsigned Align = 1, + bool *Fast = nullptr) const override { if (RequireStrictAlign) return false; // FIXME: True for Cyclone, but not necessary others. diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index bcb8f46999e..49ed02277e2 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -9696,8 +9696,10 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); } -bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, unsigned, - bool *Fast) const { +bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const { // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); @@ -9751,11 +9753,12 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, bool Fast; if (Size >= 16 && (memOpAlign(SrcAlign, DstAlign, 16) || - (allowsUnalignedMemoryAccesses(MVT::v2f64, 0, &Fast) && Fast))) { + (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) { return MVT::v2f64; } else if (Size >= 8 && (memOpAlign(SrcAlign, DstAlign, 8) || - (allowsUnalignedMemoryAccesses(MVT::f64, 0, &Fast) && Fast))) { + (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) && + Fast))) { return MVT::f64; } } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 8f8986bd779..80bd718edc9 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -266,11 +266,12 @@ namespace llvm { bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override; - /// allowsUnalignedMemoryAccesses - Returns true if the target allows + /// allowsMisalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses of the specified type. Returns whether it /// is "fast" by reference in the second argument. - bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace, - bool *Fast) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, + unsigned Align, + bool *Fast) const override; EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index f8283b08d48..be0e8d2b218 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -188,7 +188,7 @@ protected: /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory /// accesses for some types. For details, see - /// ARMTargetLowering::allowsUnalignedMemoryAccesses(). + /// ARMTargetLowering::allowsMisalignedMemoryAccesses(). bool AllowsUnalignedMem; /// RestrictIT - If true, the subtarget disallows generation of deprecated IT diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp index 587925df946..9f07c6c75f3 100644 --- a/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -157,9 +157,10 @@ llvm::createMips16TargetLowering(MipsTargetMachine &TM, } bool -Mips16TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, - unsigned, - bool *Fast) const { +Mips16TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const { return false; } diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h index e7e4d7f651d..aa153901881 100644 --- a/lib/Target/Mips/Mips16ISelLowering.h +++ b/lib/Target/Mips/Mips16ISelLowering.h @@ -22,8 +22,9 @@ namespace llvm { explicit Mips16TargetLowering(MipsTargetMachine &TM, const MipsSubtarget &STI); - bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace, - bool *Fast) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, + unsigned Align, + bool *Fast) const override; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index ef70c0b49b6..16b414e8856 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -329,9 +329,10 @@ addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { } bool -MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, - unsigned, - bool *Fast) const { +MipsSETargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const { MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; if (Subtarget.systemSupportsUnalignedAccess()) { diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h index 00d86834be0..a252291a1fa 100644 --- a/lib/Target/Mips/MipsSEISelLowering.h +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -31,8 +31,9 @@ namespace llvm { void addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); - bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS = 0, - bool *Fast = nullptr) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS = 0, + unsigned Align = 1, + bool *Fast = nullptr) const override; SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index d63fac17306..537b80fa8fb 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9214,9 +9214,10 @@ bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { return isInt<16>(Imm) || isUInt<16>(Imm); } -bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, - unsigned, - bool *Fast) const { +bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const { if (DisablePPCUnaligned) return false; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 9497ae8f99a..b8b917e07b7 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -494,9 +494,10 @@ namespace llvm { /// Is unaligned memory access allowed for the given type, and is it fast /// relative to software emulation. - bool allowsUnalignedMemoryAccesses(EVT VT, - unsigned AddrSpace, - bool *Fast = nullptr) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, + unsigned AddrSpace, + unsigned Align = 1, + bool *Fast = nullptr) const override; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 577d1af3c23..698f66fc43d 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -240,15 +240,13 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : // TargetLowering queries //===----------------------------------------------------------------------===// -bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT, - unsigned AddrSpace, - bool *IsFast) const { +bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned AddrSpace, + unsigned Align, + bool *IsFast) const { if (IsFast) *IsFast = false; - // XXX: This depends on the address space and also we may want to revist - // the alignment values we specify in the DataLayout. - // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96, // which isn't a simple VT. if (!VT.isSimple() || VT == MVT::Other) @@ -261,8 +259,12 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT, // XXX - The only mention I see of this in the ISA manual is for LDS direct // reads the "byte address and must be dword aligned". Is it also true for the // normal loads and stores? - if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS) - return false; + if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS) { + // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte + // aligned, 8 byte access in a single operation using ds_read2/write2_b32 + // with adjacent offsets. + return Align % 4 == 0; + } // 8.1.6 - For Dword or larger reads or writes, the two LSBs of the // byte-address are ignored, thus forcing Dword alignment. diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index db276329919..d03bc864148 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -59,8 +59,9 @@ class SITargetLowering : public AMDGPUTargetLowering { public: SITargetLowering(TargetMachine &tm); - bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS, - bool *IsFast) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, + unsigned Align, + bool *IsFast) const override; TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 00c65f5bba6..dcb122cc84d 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -339,9 +339,10 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { return Imm.isZero() || Imm.isNegZero(); } -bool SystemZTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, - unsigned, - bool *Fast) const { +bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const { // Unaligned accesses should never be slower than the expanded version. // We check specifically for aligned accesses in the few cases where // they are required. diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index e21b0501933..c8f5e4fa793 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -208,8 +208,9 @@ public: bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; - bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS, - bool *Fast) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, + unsigned Align, + bool *Fast) const override; bool isTruncateFree(Type *, Type *) const override; bool isTruncateFree(EVT, EVT) const override; const char *getTargetNodeName(unsigned Opcode) const override; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 38a6ba0d91f..6ae818abe86 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1775,9 +1775,10 @@ bool X86TargetLowering::isSafeMemOpType(MVT VT) const { } bool -X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, - unsigned, - bool *Fast) const { +X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const { if (Fast) *Fast = Subtarget->isUnalignedMemAccessFast(); return true; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 32941766158..f0e4cf8280b 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -565,10 +565,10 @@ namespace llvm { /// legal as the hook is used before type legalization. bool isSafeMemOpType(MVT VT) const override; - /// allowsUnalignedMemoryAccesses - Returns true if the target allows + /// allowsMisalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. Returns whether it /// is "fast" by reference in the second argument. - bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS, + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *Fast) const override; /// LowerOperation - Provide custom lowering hooks for some operations. diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 492784d44d5..780e971daf2 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -426,7 +426,9 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { assert(LD->getExtensionType() == ISD::NON_EXTLOAD && "Unexpected extension type"); assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT"); - if (allowsUnalignedMemoryAccesses(LD->getMemoryVT())) + if (allowsMisalignedMemoryAccesses(LD->getMemoryVT(), + LD->getAddressSpace(), + LD->getAlignment())) return SDValue(); unsigned ABIAlignment = getDataLayout()-> @@ -504,7 +506,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const StoreSDNode *ST = cast(Op); assert(!ST->isTruncatingStore() && "Unexpected store type"); assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT"); - if (allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + if (allowsMisalignedMemoryAccesses(ST->getMemoryVT(), + ST->getAddressSpace(), + ST->getAlignment())) { return SDValue(); } unsigned ABIAlignment = getDataLayout()-> @@ -1803,7 +1807,9 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // Replace unaligned store of unaligned load with memmove. StoreSDNode *ST = cast(N); if (!DCI.isBeforeLegalize() || - allowsUnalignedMemoryAccesses(ST->getMemoryVT()) || + allowsMisalignedMemoryAccesses(ST->getMemoryVT(), + ST->getAddressSpace(), + ST->getAlignment()) || ST->isVolatile() || ST->isIndexed()) { break; } diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll index 0ba109b670a..76ea97cd94a 100644 --- a/test/CodeGen/R600/unaligned-load-store.ll +++ b/test/CodeGen/R600/unaligned-load-store.ll @@ -31,3 +31,20 @@ define void @unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> ad store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1 ret void } + +; FIXME: This should use ds_read2_b32 +; SI-LABEL: @load_lds_i64_align_4 +; SI: DS_READ_B64 +; SI: S_ENDPGM +define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { + %val = load i64 addrspace(3)* %in, align 4 + store i64 %val, i64 addrspace(1)* %out, align 8 + ret void +} + +; FIXME: Need to fix this case. +; define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { +; %val = load i64 addrspace(3)* %in, align 1 +; store i64 %val, i64 addrspace(1)* %out, align 8 +; ret void +; } -- 2.34.1