X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FSystemZ%2FSystemZSelectionDAGInfo.cpp;h=178aa381731127cf4c4997033d9ae7fc912685f7;hb=a6aa0c3bcc9cc1c2d0c8cc3b59ce50fa8948e2dc;hp=789594b36c6025be9c05017ab4b43b140c427262;hpb=e1b2af731e2a45344a7c502232f66c55cd746da0;p=oota-llvm.git diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 789594b36c6..178aa381731 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -11,18 +11,39 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "systemz-selectiondag-info" #include "SystemZTargetMachine.h" #include "llvm/CodeGen/SelectionDAG.h" using namespace llvm; -SystemZSelectionDAGInfo:: -SystemZSelectionDAGInfo(const SystemZTargetMachine &TM) - : TargetSelectionDAGInfo(TM) { -} +#define DEBUG_TYPE "systemz-selectiondag-info" -SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() { +// Decide whether it is best to use a loop or straight-line code for +// a block operation of Size bytes with source address Src and destination +// address Dest. Sequence is the opcode to use for straight-line code +// (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP). +// Return the chain for the completed operation. +static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence, + unsigned Loop, SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size) { + EVT PtrVT = Src.getValueType(); + // The heuristic we use is to prefer loops for anything that would + // require 7 or more MVCs. With these kinds of sizes there isn't + // much to choose between straight-line code and looping code, + // since the time will be dominated by the MVCs themselves. + // However, the loop has 4 or 5 instructions (depending on whether + // the base addresses can be proved equal), so there doesn't seem + // much point using a loop for 5 * 256 bytes or fewer. Anything in + // the range (5 * 256, 6 * 256) will need another instruction after + // the loop, so it doesn't seem worth using a loop then either. + // The next value up, 6 * 256, can be implemented in the same + // number of straight-line MVCs as 6 * 256 - 1. + if (Size > 6 * 256) + return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src, + DAG.getConstant(Size, DL, PtrVT), + DAG.getConstant(Size / 256, DL, PtrVT)); + return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src, + DAG.getConstant(Size, DL, PtrVT)); } SDValue SystemZSelectionDAGInfo:: @@ -34,14 +55,9 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, if (IsVolatile) return SDValue(); - if (ConstantSDNode *CSize = dyn_cast(Size)) { - uint64_t Bytes = CSize->getZExtValue(); - if (Bytes >= 1 && Bytes <= 0x100) { - // A single MVC. - return DAG.getNode(SystemZISD::MVC, DL, MVT::Other, - Chain, Dst, Src, Size); - } - } + if (auto *CSize = dyn_cast(Size)) + return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, + Chain, Dst, Src, CSize->getZExtValue()); return SDValue(); } @@ -56,7 +72,8 @@ static SDValue memsetStore(SelectionDAG &DAG, SDLoc DL, SDValue Chain, for (unsigned I = 1; I < Size; ++I) StoreVal |= ByteVal << (I * 8); return DAG.getStore(Chain, DL, - DAG.getConstant(StoreVal, MVT::getIntegerVT(Size * 8)), + DAG.getConstant(StoreVal, DL, + MVT::getIntegerVT(Size * 8)), Dst, DstPtrInfo, false, false, Align); } @@ -65,23 +82,23 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Dst, SDValue Byte, SDValue Size, unsigned Align, bool IsVolatile, MachinePointerInfo DstPtrInfo) const { - EVT DstVT = Dst.getValueType(); + EVT PtrVT = Dst.getValueType(); if (IsVolatile) return SDValue(); - if (ConstantSDNode *CSize = dyn_cast(Size)) { + if (auto *CSize = dyn_cast(Size)) { uint64_t Bytes = CSize->getZExtValue(); if (Bytes == 0) return SDValue(); - if (ConstantSDNode *CByte = dyn_cast(Byte)) { + if (auto *CByte = dyn_cast(Byte)) { // Handle cases that can be done using at most two of // MVI, MVHI, MVHHI and MVGHI. The latter two can only be // used if ByteVal is all zeros or all ones; in other casees, // we can move at most 2 halfwords. uint64_t ByteVal = CByte->getZExtValue(); if (ByteVal == 0 || ByteVal == 255 ? - Bytes <= 16 && CountPopulation_64(Bytes) <= 2 : + Bytes <= 16 && countPopulation(Bytes) <= 2 : Bytes <= 4) { unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes); unsigned Size2 = Bytes - Size1; @@ -89,8 +106,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, Align, DstPtrInfo); if (Size2 == 0) return Chain1; - Dst = DAG.getNode(ISD::ADD, DL, DstVT, Dst, - DAG.getConstant(Size1, DstVT)); + Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, + DAG.getConstant(Size1, DL, PtrVT)); DstPtrInfo = DstPtrInfo.getWithOffset(Size1); SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2, std::min(Align, Size1), DstPtrInfo); @@ -103,8 +120,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, false, false, Align); if (Bytes == 1) return Chain1; - SDValue Dst2 = DAG.getNode(ISD::ADD, DL, DstVT, Dst, - DAG.getConstant(1, DstVT)); + SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, + DAG.getConstant(1, DL, PtrVT)); SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2, DstPtrInfo.getWithOffset(1), false, false, 1); @@ -112,20 +129,47 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, } } assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already"); - if (Bytes <= 0x101) { - // Copy the byte to the first location and then use MVC to copy - // it to the rest. - Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, - false, false, Align); - SDValue Dst2 = DAG.getNode(ISD::ADD, DL, DstVT, Dst, - DAG.getConstant(1, DstVT)); - return DAG.getNode(SystemZISD::MVC, DL, MVT::Other, Chain, Dst2, Dst, - DAG.getConstant(Bytes - 1, MVT::i32)); - } + + // Handle the special case of a memset of 0, which can use XC. + auto *CByte = dyn_cast(Byte); + if (CByte && CByte->getZExtValue() == 0) + return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP, + Chain, Dst, Dst, Bytes); + + // Copy the byte to the first location and then use MVC to copy + // it to the rest. + Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, + false, false, Align); + SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, + DAG.getConstant(1, DL, PtrVT)); + return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, + Chain, DstPlus1, Dst, Bytes - 1); } return SDValue(); } +// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size), +// deciding whether to use a loop or straight-line code. +static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, uint64_t Size) { + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + EVT PtrVT = Src1.getValueType(); + // A two-CLC sequence is a clear win over a loop, not least because it + // needs only one branch. A three-CLC sequence needs the same number + // of branches as a loop (i.e. 2), but is shorter. That brings us to + // lengths greater than 768 bytes. It seems relatively likely that + // a difference will be found within the first 768 bytes, so we just + // optimize for the smallest number of branch instructions, in order + // to avoid polluting the prediction buffer too much. A loop only ever + // needs 2 branches, whereas a straight-line sequence would need 3 or more. + if (Size > 3 * 256) + return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2, + DAG.getConstant(Size, DL, PtrVT), + DAG.getConstant(Size / 256, DL, PtrVT)); + return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2, + DAG.getConstant(Size, DL, PtrVT)); +} + // Convert the current CC value into an integer that is 0 if CC == 0, // less than zero if CC == 1 and greater than zero if CC >= 2. // The sequence starts with IPM, which puts CC into bits 29 and 28 @@ -133,9 +177,9 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) { SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, - DAG.getConstant(28, MVT::i32)); + DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL, - DAG.getConstant(31, MVT::i32)); + DAG.getConstant(31, DL, MVT::i32)); return ROTL; } @@ -144,20 +188,55 @@ EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Src1, SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo, MachinePointerInfo Op2PtrInfo) const { - if (ConstantSDNode *CSize = dyn_cast(Size)) { + if (auto *CSize = dyn_cast(Size)) { uint64_t Bytes = CSize->getZExtValue(); - if (Bytes >= 1 && Bytes <= 0x100) { - // A single CLC. - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, - Src1, Src2, Size); - SDValue Glue = Chain.getValue(1); - return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); - } + assert(Bytes > 0 && "Caller should have handled 0-size case"); + Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes); + SDValue Glue = Chain.getValue(1); + return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); } return std::make_pair(SDValue(), SDValue()); } +std::pair SystemZSelectionDAGInfo:: +EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, SDValue Char, SDValue Length, + MachinePointerInfo SrcPtrInfo) const { + // Use SRST to find the character. End is its address on success. + EVT PtrVT = Src.getValueType(); + SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); + Length = DAG.getZExtOrTrunc(Length, DL, PtrVT); + Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32); + Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char, + DAG.getConstant(255, DL, MVT::i32)); + SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length); + SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, + Limit, Src, Char); + Chain = End.getValue(1); + SDValue Glue = End.getValue(2); + + // Now select between End and null, depending on whether the character + // was found. + SDValue Ops[] = {End, DAG.getConstant(0, DL, PtrVT), + DAG.getConstant(SystemZ::CCMASK_SRST, DL, MVT::i32), + DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32), + Glue}; + VTs = DAG.getVTList(PtrVT, MVT::Glue); + End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); + return std::make_pair(End, Chain); +} + +std::pair SystemZSelectionDAGInfo:: +EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dest, SDValue Src, + MachinePointerInfo DestPtrInfo, + MachinePointerInfo SrcPtrInfo, bool isStpcpy) const { + SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other); + SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src, + DAG.getConstant(0, DL, MVT::i32)); + return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1)); +} + std::pair SystemZSelectionDAGInfo:: EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Src1, SDValue Src2, @@ -165,8 +244,42 @@ EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, MachinePointerInfo Op2PtrInfo) const { SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue); SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2, - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(0, DL, MVT::i32)); Chain = Unused.getValue(1); SDValue Glue = Chain.getValue(2); return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); } + +// Search from Src for a null character, stopping once Src reaches Limit. +// Return a pair of values, the first being the number of nonnull characters +// and the second being the out chain. +// +// This can be used for strlen by setting Limit to 0. +static std::pair getBoundedStrlen(SelectionDAG &DAG, SDLoc DL, + SDValue Chain, SDValue Src, + SDValue Limit) { + EVT PtrVT = Src.getValueType(); + SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); + SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, + Limit, Src, DAG.getConstant(0, DL, MVT::i32)); + Chain = End.getValue(1); + SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src); + return std::make_pair(Len, Chain); +} + +std::pair SystemZSelectionDAGInfo:: +EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, MachinePointerInfo SrcPtrInfo) const { + EVT PtrVT = Src.getValueType(); + return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, DL, PtrVT)); +} + +std::pair SystemZSelectionDAGInfo:: +EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, SDValue MaxLength, + MachinePointerInfo SrcPtrInfo) const { + EVT PtrVT = Src.getValueType(); + MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT); + SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength); + return getBoundedStrlen(DAG, DL, Chain, Src, Limit); +}