lib/Target/ARM/ARMSelectionDAGInfo.cpp

   1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the ARMSelectionDAGInfo class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "ARMTargetMachine.h"
  15 #include "llvm/CodeGen/SelectionDAG.h"
  16 #include "llvm/IR/DerivedTypes.h"
  17 using namespace llvm;
  18
  19 #define DEBUG_TYPE "arm-selectiondag-info"
  20
  21 // Emit, if possible, a specialized version of the given Libcall. Typically this
  22 // means selecting the appropriately aligned version, but we also convert memset
  23 // of 0 into memclr.
  24 SDValue ARMSelectionDAGInfo::
  25 EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
  26                        SDValue Chain,
  27                        SDValue Dst, SDValue Src,
  28                        SDValue Size, unsigned Align,
  29                        RTLIB::Libcall LC) const {
  30   const ARMSubtarget &Subtarget =
  31       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
  32   const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
  33
  34   // Only use a specialized AEABI function if the default version of this
  35   // Libcall is an AEABI function.
  36   if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
  37     return SDValue();
  38
  39   // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
  40   // able to translate memset to memclr and use the value to index the function
  41   // name array.
  42   enum {
  43     AEABI_MEMCPY = 0,
  44     AEABI_MEMMOVE,
  45     AEABI_MEMSET,
  46     AEABI_MEMCLR
  47   } AEABILibcall;
  48   switch (LC) {
  49   case RTLIB::MEMCPY:
  50     AEABILibcall = AEABI_MEMCPY;
  51     break;
  52   case RTLIB::MEMMOVE:
  53     AEABILibcall = AEABI_MEMMOVE;
  54     break;
  55   case RTLIB::MEMSET:
  56     AEABILibcall = AEABI_MEMSET;
  57     if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
  58       if (ConstantSrc->getZExtValue() == 0)
  59         AEABILibcall = AEABI_MEMCLR;
  60     break;
  61   default:
  62     return SDValue();
  63   }
  64
  65   // Choose the most-aligned libcall variant that we can
  66   enum {
  67     ALIGN1 = 0,
  68     ALIGN4,
  69     ALIGN8
  70   } AlignVariant;
  71   if ((Align & 7) == 0)
  72     AlignVariant = ALIGN8;
  73   else if ((Align & 3) == 0)
  74     AlignVariant = ALIGN4;
  75   else
  76     AlignVariant = ALIGN1;
  77
  78   TargetLowering::ArgListTy Args;
  79   TargetLowering::ArgListEntry Entry;
  80   Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
  81   Entry.Node = Dst;
  82   Args.push_back(Entry);
  83   if (AEABILibcall == AEABI_MEMCLR) {
  84     Entry.Node = Size;
  85     Args.push_back(Entry);
  86   } else if (AEABILibcall == AEABI_MEMSET) {
  87     // Adjust parameters for memset, EABI uses format (ptr, size, value),
  88     // GNU library uses (ptr, value, size)
  89     // See RTABI section 4.3.4
  90     Entry.Node = Size;
  91     Args.push_back(Entry);
  92
  93     // Extend or truncate the argument to be an i32 value for the call.
  94     if (Src.getValueType().bitsGT(MVT::i32))
  95       Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
  96     else if (Src.getValueType().bitsLT(MVT::i32))
  97       Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
  98
  99     Entry.Node = Src;
 100     Entry.Ty = Type::getInt32Ty(*DAG.getContext());
 101     Entry.isSExt = false;
 102     Args.push_back(Entry);
 103   } else {
 104     Entry.Node = Src;
 105     Args.push_back(Entry);
 106
 107     Entry.Node = Size;
 108     Args.push_back(Entry);
 109   }
 110
 111   char const *FunctionNames[4][3] = {
 112     { "__aeabi_memcpy",  "__aeabi_memcpy4",  "__aeabi_memcpy8"  },
 113     { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
 114     { "__aeabi_memset",  "__aeabi_memset4",  "__aeabi_memset8"  },
 115     { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  }
 116   };
 117   TargetLowering::CallLoweringInfo CLI(DAG);
 118   CLI.setDebugLoc(dl)
 119       .setChain(Chain)
 120       .setCallee(
 121            TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
 122            DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
 123                                  TLI->getPointerTy(DAG.getDataLayout())),
 124            std::move(Args), 0)
 125       .setDiscardResult();
 126   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
 127
 128   return CallResult.second;
 129 }
 130
 131 SDValue
 132 ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
 133                                              SDValue Chain,
 134                                              SDValue Dst, SDValue Src,
 135                                              SDValue Size, unsigned Align,
 136                                              bool isVolatile, bool AlwaysInline,
 137                                              MachinePointerInfo DstPtrInfo,
 138                                           MachinePointerInfo SrcPtrInfo) const {
 139   const ARMSubtarget &Subtarget =
 140       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
 141   // Do repeated 4-byte loads and stores. To be improved.
 142   // This requires 4-byte alignment.
 143   if ((Align & 3) != 0)
 144     return SDValue();
 145   // This requires the copy size to be a constant, preferably
 146   // within a subtarget-specific limit.
 147   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
 148   if (!ConstantSize)
 149     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
 150                                   RTLIB::MEMCPY);
 151   uint64_t SizeVal = ConstantSize->getZExtValue();
 152   if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
 153     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
 154                                   RTLIB::MEMCPY);
 155
 156   unsigned BytesLeft = SizeVal & 3;
 157   unsigned NumMemOps = SizeVal >> 2;
 158   unsigned EmittedNumMemOps = 0;
 159   EVT VT = MVT::i32;
 160   unsigned VTSize = 4;
 161   unsigned i = 0;
 162   // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
 163   const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
 164   SDValue TFOps[6];
 165   SDValue Loads[6];
 166   uint64_t SrcOff = 0, DstOff = 0;
 167
 168   // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
 169   // VLDM/VSTM and make this code emit it when appropriate. This would reduce
 170   // pressure on the general purpose registers. However this seems harder to map
 171   // onto the register allocator's view of the world.
 172
 173   // The number of MEMCPY pseudo-instructions to emit. We use up to
 174   // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
 175   // later on. This is a lower bound on the number of MEMCPY operations we must
 176   // emit.
 177   unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
 178
 179   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
 180
 181   for (unsigned I = 0; I != NumMEMCPYs; ++I) {
 182     // Evenly distribute registers among MEMCPY operations to reduce register
 183     // pressure.
 184     unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
 185     unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
 186
 187     Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
 188                       DAG.getConstant(NumRegs, dl, MVT::i32));
 189     Src = Dst.getValue(1);
 190     Chain = Dst.getValue(2);
 191
 192     DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
 193     SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
 194
 195     EmittedNumMemOps = NextEmittedNumMemOps;
 196   }
 197
 198   if (BytesLeft == 0)
 199     return Chain;
 200
 201   // Issue loads / stores for the trailing (1 - 3) bytes.
 202   unsigned BytesLeftSave = BytesLeft;
 203   i = 0;
 204   while (BytesLeft) {
 205     if (BytesLeft >= 2) {
 206       VT = MVT::i16;
 207       VTSize = 2;
 208     } else {
 209       VT = MVT::i8;
 210       VTSize = 1;
 211     }
 212
 213     Loads[i] = DAG.getLoad(VT, dl, Chain,
 214                            DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
 215                                        DAG.getConstant(SrcOff, dl, MVT::i32)),
 216                            SrcPtrInfo.getWithOffset(SrcOff),
 217                            false, false, false, 0);
 218     TFOps[i] = Loads[i].getValue(1);
 219     ++i;
 220     SrcOff += VTSize;
 221     BytesLeft -= VTSize;
 222   }
 223   Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
 224                       makeArrayRef(TFOps, i));
 225
 226   i = 0;
 227   BytesLeft = BytesLeftSave;
 228   while (BytesLeft) {
 229     if (BytesLeft >= 2) {
 230       VT = MVT::i16;
 231       VTSize = 2;
 232     } else {
 233       VT = MVT::i8;
 234       VTSize = 1;
 235     }
 236
 237     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
 238                             DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
 239                                         DAG.getConstant(DstOff, dl, MVT::i32)),
 240                             DstPtrInfo.getWithOffset(DstOff), false, false, 0);
 241     ++i;
 242     DstOff += VTSize;
 243     BytesLeft -= VTSize;
 244   }
 245   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
 246                      makeArrayRef(TFOps, i));
 247 }
 248
 249
 250 SDValue ARMSelectionDAGInfo::
 251 EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl,
 252                          SDValue Chain,
 253                          SDValue Dst, SDValue Src,
 254                          SDValue Size, unsigned Align,
 255                          bool isVolatile,
 256                          MachinePointerInfo DstPtrInfo,
 257                          MachinePointerInfo SrcPtrInfo) const {
 258   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
 259                                 RTLIB::MEMMOVE);
 260 }
 261
 262
 263 SDValue ARMSelectionDAGInfo::
 264 EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
 265                         SDValue Chain, SDValue Dst,
 266                         SDValue Src, SDValue Size,
 267                         unsigned Align, bool isVolatile,
 268                         MachinePointerInfo DstPtrInfo) const {
 269   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
 270                                 RTLIB::MEMSET);
 271 }