[SystemZ] Use MVC for memcpy
authorRichard Sandiford <rsandifo@linux.vnet.ibm.com>
Mon, 8 Jul 2013 09:35:23 +0000 (09:35 +0000)
committerRichard Sandiford <rsandifo@linux.vnet.ibm.com>
Mon, 8 Jul 2013 09:35:23 +0000 (09:35 +0000)
Use MVC for memcpy in cases where a single MVC is enough.  Using MVC is
a win for longer copies too, but I'll leave that for later.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185802 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/SystemZ/CMakeLists.txt
lib/Target/SystemZ/SystemZISelLowering.cpp
lib/Target/SystemZ/SystemZISelLowering.h
lib/Target/SystemZ/SystemZInstrInfo.td
lib/Target/SystemZ/SystemZOperands.td
lib/Target/SystemZ/SystemZOperators.td
lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp [new file with mode: 0644]
lib/Target/SystemZ/SystemZSelectionDAGInfo.h [new file with mode: 0644]
lib/Target/SystemZ/SystemZTargetMachine.h
test/CodeGen/SystemZ/memcpy-01.ll [new file with mode: 0644]

index edb679dabfdd9cb4b9c9fee1a5ed202ddc5c56fa..04bbec5127e2e50a67e789b1a270d6eb0563cce3 100644 (file)
@@ -22,6 +22,7 @@ add_llvm_target(SystemZCodeGen
   SystemZLongBranch.cpp
   SystemZMCInstLower.cpp
   SystemZRegisterInfo.cpp
+  SystemZSelectionDAGInfo.cpp
   SystemZSubtarget.cpp
   SystemZTargetMachine.cpp
   )
index 256c27829d7f2bd3a7499056265852e3a2982d4b..b49e6a0e2178912522b60a6db60b3228827a528b 100644 (file)
@@ -241,6 +241,12 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
   setOperationAction(ISD::VASTART, MVT::Other, Custom);
   setOperationAction(ISD::VACOPY,  MVT::Other, Custom);
   setOperationAction(ISD::VAEND,   MVT::Other, Expand);
+
+  // We want to use MVC in preference to even a single load/store pair.
+  MaxStoresPerMemcpy = 0;
+  MaxStoresPerMemcpyOptSize = 0;
+  MaxStoresPerMemmove = 0;
+  MaxStoresPerMemmoveOptSize = 0;
 }
 
 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
@@ -1579,6 +1585,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
     OPCODE(SDIVREM64);
     OPCODE(UDIVREM32);
     OPCODE(UDIVREM64);
+    OPCODE(MVC);
     OPCODE(ATOMIC_SWAPW);
     OPCODE(ATOMIC_LOADW_ADD);
     OPCODE(ATOMIC_LOADW_SUB);
@@ -2143,6 +2150,26 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI,
   return MBB;
 }
 
+MachineBasicBlock *
+SystemZTargetLowering::emitMVCWrapper(MachineInstr *MI,
+                                      MachineBasicBlock *MBB) const {
+  const SystemZInstrInfo *TII = TM.getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+
+  MachineOperand DestBase = MI->getOperand(0);
+  uint64_t       DestDisp = MI->getOperand(1).getImm();
+  MachineOperand SrcBase  = MI->getOperand(2);
+  uint64_t       SrcDisp  = MI->getOperand(3).getImm();
+  uint64_t       Length   = MI->getOperand(4).getImm();
+
+  BuildMI(*MBB, MI, DL, TII->get(SystemZ::MVC))
+    .addOperand(DestBase).addImm(DestDisp).addImm(Length)
+    .addOperand(SrcBase).addImm(SrcDisp);
+
+  MI->eraseFromParent();
+  return MBB;
+}
+
 MachineBasicBlock *SystemZTargetLowering::
 EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
   switch (MI->getOpcode()) {
@@ -2376,6 +2403,8 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
                                    MI->getOperand(1).getMBB()))
       MI->eraseFromParent();
     return MBB;
+  case SystemZ::MVCWrapper:
+    return emitMVCWrapper(MI, MBB);
   default:
     llvm_unreachable("Unexpected instr type to insert");
   }
index 21b4d7245023adeaac797480920019e5eac4a506..4ddfcbbda0531f252aab230423ca4c62d9306ec6 100644 (file)
@@ -73,6 +73,13 @@ namespace SystemZISD {
     UDIVREM32,
     UDIVREM64,
 
+    // Use MVC to copy bytes from one memory location to another.
+    // The first operand is the target address, the second operand is the
+    // source address, and the third operand is the constant length.
+    // This isn't a memory opcode because we'd need to attach two
+    // MachineMemOperands rather than one.
+    MVC,
+
     // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
     // ATOMIC_LOAD_<op>.
     //
@@ -221,6 +228,8 @@ private:
                                           unsigned BitSize) const;
   MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI,
                                         MachineBasicBlock *BB) const;
+  MachineBasicBlock *emitMVCWrapper(MachineInstr *MI,
+                                    MachineBasicBlock *BB) const;
 };
 } // end namespace llvm
 
index 6b74220a6efbf6aaed17ddc46bad96b41529b5ef..b4e5c2583bf4aa17fe45dc308353e670f6ac1b66 100644 (file)
@@ -288,6 +288,12 @@ let mayLoad = 1, mayStore = 1 in
                                       bdaddr12only:$BD2),
                    "mvc\t$BDL1, $BD2", []>;
 
+let mayLoad = 1, mayStore = 1, usesCustomInserter = 1 in
+  def MVCWrapper : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+                                       imm32len8:$length),
+                          [(z_mvc bdaddr12only:$dest, bdaddr12only:$src,
+                                  imm32len8:$length)]>;
+
 //===----------------------------------------------------------------------===//
 // Sign extensions
 //===----------------------------------------------------------------------===//
index 620876e7cbbcc7896c9c3bd8ed6554fa6c219201..9d7943922807c104761a8af2bba2eeb29c2b6e35 100644 (file)
@@ -219,6 +219,11 @@ def uimm8    : Immediate<i8, [{}], UIMM8, "U8Imm">;
 // i32 immediates
 //===----------------------------------------------------------------------===//
 
+// Immediates for 8-bit lengths.
+def imm32len8 : Immediate<i32, [{
+  return isUInt<8>(N->getZExtValue() - 1);
+}], NOOP_SDNodeXForm, "U32Imm">;
+
 // Immediates for the lower and upper 16 bits of an i32, with the other
 // bits of the i32 being zero.
 def imm32ll16 : Immediate<i32, [{
index a84af7a806718053f3f63f5f283413781e5ec9b2..693f3a1e166732f8bc804adc060423be07e32f20 100644 (file)
@@ -52,6 +52,10 @@ def SDT_ZAtomicCmpSwapW     : SDTypeProfile<1, 6,
                                              SDTCisVT<4, i32>,
                                              SDTCisVT<5, i32>,
                                              SDTCisVT<6, i32>]>;
+def SDT_ZCopy               : SDTypeProfile<0, 3,
+                                            [SDTCisPtrTy<0>,
+                                             SDTCisPtrTy<1>,
+                                             SDTCisVT<2, i32>]>;
 
 //===----------------------------------------------------------------------===//
 // Node definitions
@@ -103,6 +107,9 @@ def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">;
 def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">;
 def z_atomic_cmp_swapw  : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>;
 
+def z_mvc               : SDNode<"SystemZISD::MVC", SDT_ZCopy,
+                                 [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+
 //===----------------------------------------------------------------------===//
 // Pattern fragments
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
new file mode 100644 (file)
index 0000000..d2da9d2
--- /dev/null
@@ -0,0 +1,46 @@
+//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-selectiondag-info"
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+using namespace llvm;
+
+SystemZSelectionDAGInfo::
+SystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
+}
+
+SDValue SystemZSelectionDAGInfo::
+EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+                        SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
+                        bool IsVolatile, bool AlwaysInline,
+                        MachinePointerInfo DstPtrInfo,
+                        MachinePointerInfo SrcPtrInfo) const {
+  if (IsVolatile)
+    return SDValue();
+
+  if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) {
+    uint64_t Bytes = CSize->getZExtValue();
+    if (Bytes >= 1 && Bytes <= 0x100) {
+      // A single MVC.
+      return DAG.getNode(SystemZISD::MVC, DL, MVT::Other,
+                         Chain, Dst, Src, Size);
+    }
+  }
+  return SDValue();
+}
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
new file mode 100644 (file)
index 0000000..39c1491
--- /dev/null
@@ -0,0 +1,40 @@
+//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SystemZ subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZSELECTIONDAGINFO_H
+#define SYSTEMZSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SystemZTargetMachine;
+
+class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM);
+  ~SystemZSelectionDAGInfo();
+
+  virtual
+  SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+                                  SDValue Dst, SDValue Src,
+                                  SDValue Size, unsigned Align,
+                                  bool IsVolatile, bool AlwaysInline,
+                                  MachinePointerInfo DstPtrInfo,
+                                  MachinePointerInfo SrcPtrInfo) const
+    LLVM_OVERRIDE;
+};
+
+}
+
+#endif
index 98614e7b7e2ce4ad06b73a10341efa590c789da9..a99a98e08477cedc37a40c9f188bdd94b6b6f42a 100644 (file)
 #include "SystemZInstrInfo.h"
 #include "SystemZRegisterInfo.h"
 #include "SystemZSubtarget.h"
+#include "SystemZSelectionDAGInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSelectionDAGInfo.h"
 
 namespace llvm {
 
@@ -32,7 +32,7 @@ class SystemZTargetMachine : public LLVMTargetMachine {
   const DataLayout        DL;
   SystemZInstrInfo        InstrInfo;
   SystemZTargetLowering   TLInfo;
-  TargetSelectionDAGInfo  TSInfo;
+  SystemZSelectionDAGInfo TSInfo;
   SystemZFrameLowering    FrameLowering;
 
 public:
diff --git a/test/CodeGen/SystemZ/memcpy-01.ll b/test/CodeGen/SystemZ/memcpy-01.ll
new file mode 100644 (file)
index 0000000..2985b03
--- /dev/null
@@ -0,0 +1,82 @@
+; Test memcpy using MVC.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8 *nocapture, i8 *nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8 *nocapture, i8 *nocapture, i64, i32, i1) nounwind
+
+define void @f1(i8 *%dest, i8 *%src) {
+; CHECK: f1:
+; CHECK-NOT: %r2
+; CHECK-NOT: %r3
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 0, i32 1,
+                                       i1 false)
+  ret void
+}
+
+define void @f2(i8 *%dest, i8 *%src) {
+; CHECK: f2:
+; CHECK-NOT: %r2
+; CHECK-NOT: %r3
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 0, i32 1,
+                                       i1 false)
+  ret void
+}
+
+define void @f3(i8 *%dest, i8 *%src) {
+; CHECK: f3:
+; CHECK: mvc 0(1,%r2), 0(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 1, i32 1,
+                                       i1 false)
+  ret void
+}
+
+define void @f4(i8 *%dest, i8 *%src) {
+; CHECK: f4:
+; CHECK: mvc 0(1,%r2), 0(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1, i32 1,
+                                       i1 false)
+  ret void
+}
+
+define void @f5(i8 *%dest, i8 *%src) {
+; CHECK: f5:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 256, i32 1,
+                                       i1 false)
+  ret void
+}
+
+define void @f6(i8 *%dest, i8 *%src) {
+; CHECK: f6:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 256, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; 257 bytes is too big for a single MVC.  For now expect none, so that
+; the test fails and gets updated when large copies are implemented.
+define void @f7(i8 *%dest, i8 *%src) {
+; CHECK: f7:
+; CHECK-NOT: mvc
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 257, i32 1,
+                                       i1 false)
+  ret void
+}
+
+define void @f8(i8 *%dest, i8 *%src) {
+; CHECK: f8:
+; CHECK-NOT: mvc
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 257, i32 1,
+                                       i1 false)
+  ret void
+}