Make TargetLowering::getPointerTy() taking DataLayout as an argument
[oota-llvm.git] / lib / Target / ARM / ARMSelectionDAGInfo.cpp
index 2b9202bff01ca2d1aa7f2ebd6e06bdd747318ebe..fc561a9d941e0322770c86c1a2b92ce66dd1039f 100644 (file)
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "arm-selectiondag-info"
 #include "ARMTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/DerivedTypes.h"
 using namespace llvm;
 
-ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
-  : TargetSelectionDAGInfo(TM),
-    Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
-}
+#define DEBUG_TYPE "arm-selectiondag-info"
+
+ARMSelectionDAGInfo::ARMSelectionDAGInfo(const DataLayout &DL)
+    : TargetSelectionDAGInfo(&DL) {}
 
 ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
 }
 
+// Emit, if possible, a specialized version of the given Libcall. Typically this
+// means selecting the appropriately aligned version, but we also convert memset
+// of 0 into memclr.
+SDValue ARMSelectionDAGInfo::
+EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
+                       SDValue Chain,
+                       SDValue Dst, SDValue Src,
+                       SDValue Size, unsigned Align,
+                       RTLIB::Libcall LC) const {
+  const ARMSubtarget &Subtarget =
+      DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
+  const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
+
+  // Only use a specialized AEABI function if the default version of this
+  // Libcall is an AEABI function.
+  if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
+    return SDValue();
+
+  // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
+  // able to translate memset to memclr and use the value to index the function
+  // name array.
+  enum {
+    AEABI_MEMCPY = 0,
+    AEABI_MEMMOVE,
+    AEABI_MEMSET,
+    AEABI_MEMCLR
+  } AEABILibcall;
+  switch (LC) {
+  case RTLIB::MEMCPY:
+    AEABILibcall = AEABI_MEMCPY;
+    break;
+  case RTLIB::MEMMOVE:
+    AEABILibcall = AEABI_MEMMOVE;
+    break;
+  case RTLIB::MEMSET: 
+    AEABILibcall = AEABI_MEMSET;
+    if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
+      if (ConstantSrc->getZExtValue() == 0)
+        AEABILibcall = AEABI_MEMCLR;
+    break;
+  default:
+    return SDValue();
+  }
+
+  // Choose the most-aligned libcall variant that we can
+  enum {
+    ALIGN1 = 0,
+    ALIGN4,
+    ALIGN8
+  } AlignVariant;
+  if ((Align & 7) == 0)
+    AlignVariant = ALIGN8;
+  else if ((Align & 3) == 0)
+    AlignVariant = ALIGN4;
+  else
+    AlignVariant = ALIGN1;
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Ty = TLI->getDataLayout()->getIntPtrType(*DAG.getContext());
+  Entry.Node = Dst;
+  Args.push_back(Entry);
+  if (AEABILibcall == AEABI_MEMCLR) {
+    Entry.Node = Size;
+    Args.push_back(Entry);
+  } else if (AEABILibcall == AEABI_MEMSET) {
+    // Adjust parameters for memset, EABI uses format (ptr, size, value),
+    // GNU library uses (ptr, value, size)
+    // See RTABI section 4.3.4
+    Entry.Node = Size;
+    Args.push_back(Entry);
+
+    // Extend or truncate the argument to be an i32 value for the call.
+    if (Src.getValueType().bitsGT(MVT::i32))
+      Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+    else if (Src.getValueType().bitsLT(MVT::i32))
+      Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+
+    Entry.Node = Src; 
+    Entry.Ty = Type::getInt32Ty(*DAG.getContext());
+    Entry.isSExt = false;
+    Args.push_back(Entry);
+  } else {
+    Entry.Node = Src;
+    Args.push_back(Entry);
+    
+    Entry.Node = Size;
+    Args.push_back(Entry);
+  }
+
+  char const *FunctionNames[4][3] = {
+    { "__aeabi_memcpy",  "__aeabi_memcpy4",  "__aeabi_memcpy8"  },
+    { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
+    { "__aeabi_memset",  "__aeabi_memset4",  "__aeabi_memset8"  },
+    { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  }
+  };
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(dl)
+      .setChain(Chain)
+      .setCallee(
+           TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
+           DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
+                                 TLI->getPointerTy(DAG.getDataLayout())),
+           std::move(Args), 0)
+      .setDiscardResult();
+  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+  
+  return CallResult.second;
+}
+
 SDValue
-ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
                                              SDValue Chain,
                                              SDValue Dst, SDValue Src,
                                              SDValue Size, unsigned Align,
                                              bool isVolatile, bool AlwaysInline,
                                              MachinePointerInfo DstPtrInfo,
                                           MachinePointerInfo SrcPtrInfo) const {
+  const ARMSubtarget &Subtarget =
+      DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
   // Do repeated 4-byte loads and stores. To be improved.
   // This requires 4-byte alignment.
   if ((Align & 3) != 0)
     return SDValue();
-  // This requires the copy size to be a constant, preferrably
+  // This requires the copy size to be a constant, preferably
   // within a subtarget-specific limit.
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
   if (!ConstantSize)
-    return SDValue();
+    return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
+                                  RTLIB::MEMCPY);
   uint64_t SizeVal = ConstantSize->getZExtValue();
-  if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
-    return SDValue();
+  if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
+    return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
+                                  RTLIB::MEMCPY);
 
   unsigned BytesLeft = SizeVal & 3;
   unsigned NumMemOps = SizeVal >> 2;
@@ -50,9 +165,10 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
   EVT VT = MVT::i32;
   unsigned VTSize = 4;
   unsigned i = 0;
-  const unsigned MAX_LOADS_IN_LDM = 6;
-  SDValue TFOps[MAX_LOADS_IN_LDM];
-  SDValue Loads[MAX_LOADS_IN_LDM];
+  // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
+  const unsigned MAX_LOADS_IN_LDM = Subtarget.isThumb1Only() ? 4 : 6;
+  SDValue TFOps[6];
+  SDValue Loads[6];
   uint64_t SrcOff = 0, DstOff = 0;
 
   // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
@@ -63,24 +179,26 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
          i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
       Loads[i] = DAG.getLoad(VT, dl, Chain,
                              DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
-                                         DAG.getConstant(SrcOff, MVT::i32)),
+                                         DAG.getConstant(SrcOff, dl, MVT::i32)),
                              SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
-                             false, 0);
+                             false, false, 0);
       TFOps[i] = Loads[i].getValue(1);
       SrcOff += VTSize;
     }
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        makeArrayRef(TFOps, i));
 
     for (i = 0;
          i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
       TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
                               DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
-                                          DAG.getConstant(DstOff, MVT::i32)),
+                                          DAG.getConstant(DstOff, dl, MVT::i32)),
                               DstPtrInfo.getWithOffset(DstOff),
                               isVolatile, false, 0);
       DstOff += VTSize;
     }
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        makeArrayRef(TFOps, i));
 
     EmittedNumMemOps += i;
   }
@@ -102,14 +220,16 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
 
     Loads[i] = DAG.getLoad(VT, dl, Chain,
                            DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
-                                       DAG.getConstant(SrcOff, MVT::i32)),
-                           SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
+                                       DAG.getConstant(SrcOff, dl, MVT::i32)),
+                           SrcPtrInfo.getWithOffset(SrcOff),
+                           false, false, false, 0);
     TFOps[i] = Loads[i].getValue(1);
     ++i;
     SrcOff += VTSize;
     BytesLeft -= VTSize;
   }
-  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                      makeArrayRef(TFOps, i));
 
   i = 0;
   BytesLeft = BytesLeftSave;
@@ -124,11 +244,36 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
 
     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
                             DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
-                                        DAG.getConstant(DstOff, MVT::i32)),
+                                        DAG.getConstant(DstOff, dl, MVT::i32)),
                             DstPtrInfo.getWithOffset(DstOff), false, false, 0);
     ++i;
     DstOff += VTSize;
     BytesLeft -= VTSize;
   }
-  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                     makeArrayRef(TFOps, i));
+}
+
+
+SDValue ARMSelectionDAGInfo::
+EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl,
+                         SDValue Chain,
+                         SDValue Dst, SDValue Src,
+                         SDValue Size, unsigned Align,
+                         bool isVolatile,
+                         MachinePointerInfo DstPtrInfo,
+                         MachinePointerInfo SrcPtrInfo) const {
+  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
+                                RTLIB::MEMMOVE);
+}
+
+
+SDValue ARMSelectionDAGInfo::
+EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
+                        SDValue Chain, SDValue Dst,
+                        SDValue Src, SDValue Size,
+                        unsigned Align, bool isVolatile,
+                        MachinePointerInfo DstPtrInfo) const {
+  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
+                                RTLIB::MEMSET);
 }