[XCore] Target optimized library function __memcpy_4()

author Richard Osborne <richard@xmos.com>

Thu, 27 Feb 2014 13:39:07 +0000 (13:39 +0000)

committer Richard Osborne <richard@xmos.com>

Thu, 27 Feb 2014 13:39:07 +0000 (13:39 +0000)
author Richard Osborne <richard@xmos.com>
Thu, 27 Feb 2014 13:39:07 +0000 (13:39 +0000)
committer Richard Osborne <richard@xmos.com>
Thu, 27 Feb 2014 13:39:07 +0000 (13:39 +0000)
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp

index 44aeb6057ccb6ec977b68459d91bbe25182d4e83..68ede6ae6d9efd3b71e67533526753f0062b9716 100644 (file)
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -21,3 +21,36 @@ XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const XCoreTargetMachine &TM)
  
  XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() {
  }
+
+SDValue XCoreSelectionDAGInfo::
+EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
+                        SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
+                        bool isVolatile, bool AlwaysInline,
+                        MachinePointerInfo DstPtrInfo,
+                        MachinePointerInfo SrcPtrInfo) const
+{
+  unsigned SizeBitWidth = Size.getValueType().getSizeInBits();
+  // Call __memcpy_4 if the src, dst and size are all 4 byte aligned.
+  if (!AlwaysInline && (Align & 3) == 0 &&
+      DAG.MaskedValueIsZero(Size, APInt(SizeBitWidth, 3))) {
+    const TargetLowering &TLI = *DAG.getTarget().getTargetLowering();
+    TargetLowering::ArgListTy Args;
+    TargetLowering::ArgListEntry Entry;
+    Entry.Ty = TLI.getDataLayout()->getIntPtrType(*DAG.getContext());
+    Entry.Node = Dst; Args.push_back(Entry);
+    Entry.Node = Src; Args.push_back(Entry);
+    Entry.Node = Size; Args.push_back(Entry);
+
+    TargetLowering::CallLoweringInfo
+    CLI(Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false,
+        0, TLI.getLibcallCallingConv(RTLIB::MEMCPY), /*isTailCall=*/false,
+        /*doesNotRet=*/false, /*isReturnValueUsed=*/false,
+        DAG.getExternalSymbol("__memcpy_4", TLI.getPointerTy()), Args, DAG, dl);
+    std::pair<SDValue,SDValue> CallResult =
+      TLI.LowerCallTo(CLI);
+    return CallResult.second;
+  }
+
+  // Otherwise have the target-independent code call memcpy.
+  return SDValue();
+}
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h

index 0386968638bd5b09dc7d393be0d7c3ab2dad4c9c..31704f388a9f9457dee1f11c2bdd7a97c1ab67d9 100644 (file)
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.h
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -24,6 +24,15 @@ class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo {
  public:
    explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM);
    ~XCoreSelectionDAGInfo();
+
+  virtual SDValue
+  EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
+                          SDValue Chain,
+                          SDValue Op1, SDValue Op2,
+                          SDValue Op3, unsigned Align, bool isVolatile,
+                          bool AlwaysInline,
+                          MachinePointerInfo DstPtrInfo,
+                          MachinePointerInfo SrcPtrInfo) const;
  };
  
  }
diff --git a/test/CodeGen/XCore/byVal.ll b/test/CodeGen/XCore/byVal.ll

index e9612fd6021abb53dc559eac16bb52429818b22f..df6c6d351d18851147e8a82d9901cc05cde6ca1e 100644 (file)
--- a/test/CodeGen/XCore/byVal.ll
+++ b/test/CodeGen/XCore/byVal.ll
@@ -20,7 +20,7 @@ entry:
  ; CHECK: ldaw r5, sp[1]
  ; CHECK: ldc r2, 40
  ; CHECK: mov r0, r5
-; CHECK: bl memcpy
+; CHECK: bl __memcpy_4
  ; CHECK: mov r0, r5
  ; CHECK: bl f1
  ; CHECK: mov r0, r4
diff --git a/test/CodeGen/XCore/memcpy.ll b/test/CodeGen/XCore/memcpy.ll

new file mode 100644 (file)

index 0000000..fe424c5
--- /dev/null
+++ b/test/CodeGen/XCore/memcpy.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+; Optimize memcpy to __memcpy_4 if src, dst and size are all 4 byte aligned.
+define void @f1(i8* %dst, i8* %src, i32 %n) nounwind {
+; CHECK-LABEL: f1:
+; CHECK: bl __memcpy_4
+entry:
+  %0 = shl i32 %n, 2
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %0, i32 4, i1 false)
+  ret void
+}
+
+; Can't optimize - size is not a multiple of 4.
+define void @f2(i8* %dst, i8* %src, i32 %n) nounwind {
+; CHECK-LABEL: f2:
+; CHECK: bl memcpy
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %n, i32 4, i1 false)
+  ret void
+}
+
+; Can't optimize - alignment is not a multiple of 4.
+define void @f3(i8* %dst, i8* %src, i32 %n) nounwind {
+; CHECK-LABEL: f3:
+; CHECK: bl memcpy
+entry:
+  %0 = shl i32 %n, 2
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %0, i32 2, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
author	Richard Osborne <richard@xmos.com>
	Thu, 27 Feb 2014 13:39:07 +0000 (13:39 +0000)
committer	Richard Osborne <richard@xmos.com>
	Thu, 27 Feb 2014 13:39:07 +0000 (13:39 +0000)
lib/Target/XCore/XCoreSelectionDAGInfo.cpp		patch \| blob \| history
lib/Target/XCore/XCoreSelectionDAGInfo.h		patch \| blob \| history
test/CodeGen/XCore/byVal.ll		patch \| blob \| history
test/CodeGen/XCore/memcpy.ll	[new file with mode: 0644]	patch \| blob