[SystemZ] Use CLC and IPM to implement memcmp

author Richard Sandiford <rsandifo@linux.vnet.ibm.com>

Mon, 12 Aug 2013 10:28:10 +0000 (10:28 +0000)

committer Richard Sandiford <rsandifo@linux.vnet.ibm.com>

Mon, 12 Aug 2013 10:28:10 +0000 (10:28 +0000)
author Richard Sandiford <rsandifo@linux.vnet.ibm.com>
Mon, 12 Aug 2013 10:28:10 +0000 (10:28 +0000)
committer Richard Sandiford <rsandifo@linux.vnet.ibm.com>
Mon, 12 Aug 2013 10:28:10 +0000 (10:28 +0000)
diff --git a/include/llvm/Target/TargetSelectionDAGInfo.h b/include/llvm/Target/TargetSelectionDAGInfo.h

index fe2fba42dc5a9b62523bb1705ac402a959da93bf..35da1d7a73822153741aa03a16aba95550ed18a2 100644 (file)
--- a/include/llvm/Target/TargetSelectionDAGInfo.h
+++ b/include/llvm/Target/TargetSelectionDAGInfo.h
@@ -94,6 +94,20 @@ public:
                            MachinePointerInfo DstPtrInfo) const {
      return SDValue();
    }
+
+  /// EmitTargetCodeForMemcmp - Emit target-specific code that performs a
+  /// memcmp, in cases where that is faster than a libcall.  The first
+  /// returned SDValue is the result of the memcmp and the second is
+  /// the chain.  Both SDValues can be null if a normal libcall should
+  /// be used.
+  virtual std::pair<SDValue, SDValue>
+  EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc dl,
+                          SDValue Chain,
+                          SDValue Op1, SDValue Op2,
+                          SDValue Op3, MachinePointerInfo Op1PtrInfo,
+                          MachinePointerInfo Op2PtrInfo) const {
+    return std::make_pair(SDValue(), SDValue());
+  }
  };
  
  } // end llvm namespace
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index e23c5bea224b62da4d5375e98feda19fc1211f07..1101ee1c50b2fee92bd5adf2867cbef0b2a24c56 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -58,6 +58,7 @@
  #include "llvm/Target/TargetLibraryInfo.h"
  #include "llvm/Target/TargetLowering.h"
  #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
  #include <algorithm>
  using namespace llvm;
  
@@ -5463,6 +5464,26 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
      return false;
  
    const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2));
+  if (Size && Size->getZExtValue() == 0) {
+    EVT CallVT = TM.getTargetLowering()->getValueType(I.getType(), true);
+    setValue(&I, DAG.getConstant(0, CallVT));
+    return true;
+  }
+
+  const Value *Arg0 = I.getArgOperand(0);
+  const Value *Arg1 = I.getArgOperand(1);
+  const Value *Arg2 = I.getArgOperand(2);
+  const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+  std::pair<SDValue, SDValue> Res =
+    TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(),
+                                getValue(Arg0), getValue(Arg1), getValue(Arg2),
+                                MachinePointerInfo(Arg0),
+                                MachinePointerInfo(Arg1));
+  if (Res.first.getNode()) {
+    setValue(&I, Res.first);
+    DAG.setRoot(Res.second);
+    return true;
+  }
  
    // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS)  != 0
    // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS)  != 0
diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt

index 563513b5f42f36c84de1f8b888321a350a3fab05..eebc4e4572fb0e46d9a5ab7f657d8e01ffba26c6 100644 (file)
--- a/lib/Target/SystemZ/README.txt
+++ b/lib/Target/SystemZ/README.txt
@@ -67,12 +67,12 @@ condition codes.  For example, we could use LCDFR instead of LCDBR.
  --
  
  We don't optimize block memory operations, except using single MVCs
-for memcpy.
+for memcpy and single CLCs for memcmp.
  
-It's definitely worth using things like CLC, NC, XC and OC with
+It's definitely worth using things like NC, XC and OC with
  constant lengths.  MVCIN may be worthwhile too.
  
-We should probably implement things like memcpy using MVC with EXECUTE.
+We should probably implement general memcpy using MVC with EXECUTE.
  Likewise memcmp and CLC.  MVCLE and CLCLE could be useful too.
  
  --
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp

index a51f0168a9e5c91c2e8a18f28d87c51291c850f5..899b08c4599f8b9f688d108c47cb5a84914398f1 100644 (file)
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1702,6 +1702,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
      OPCODE(UDIVREM64);
      OPCODE(MVC);
      OPCODE(CLC);
+    OPCODE(IPM);
      OPCODE(ATOMIC_SWAPW);
      OPCODE(ATOMIC_LOADW_ADD);
      OPCODE(ATOMIC_LOADW_SUB);
@@ -2240,8 +2241,9 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI,
  }
  
  MachineBasicBlock *
-SystemZTargetLowering::emitMVCWrapper(MachineInstr *MI,
-                                      MachineBasicBlock *MBB) const {
+SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
+                                         MachineBasicBlock *MBB,
+                                         unsigned Opcode) const {
    const SystemZInstrInfo *TII = TM.getInstrInfo();
    DebugLoc DL = MI->getDebugLoc();
  
@@ -2251,7 +2253,7 @@ SystemZTargetLowering::emitMVCWrapper(MachineInstr *MI,
    uint64_t       SrcDisp  = MI->getOperand(3).getImm();
    uint64_t       Length   = MI->getOperand(4).getImm();
  
-  BuildMI(*MBB, MI, DL, TII->get(SystemZ::MVC))
+  BuildMI(*MBB, MI, DL, TII->get(Opcode))
      .addOperand(DestBase).addImm(DestDisp).addImm(Length)
      .addOperand(SrcBase).addImm(SrcDisp);
  
@@ -2483,7 +2485,9 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
    case SystemZ::ATOMIC_CMP_SWAPW:
      return emitAtomicCmpSwapW(MI, MBB);
    case SystemZ::MVCWrapper:
-    return emitMVCWrapper(MI, MBB);
+    return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
+  case SystemZ::CLCWrapper:
+    return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
    default:
      llvm_unreachable("Unexpected instr type to insert");
    }
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h

index 4098ff34b38125dac199e88a282d478d9e7ca352..0036ce84aa7ba938ffe6056e2bff58e21f1cca6e 100644 (file)
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -84,6 +84,9 @@ namespace SystemZISD {
      // as for MVC.
      CLC,
  
+    // Store the CC value in bits 29 and 28 of an integer.
+    IPM,
+
      // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
      // ATOMIC_LOAD_<op>.
      //
@@ -234,8 +237,9 @@ private:
                                            unsigned BitSize) const;
    MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI,
                                          MachineBasicBlock *BB) const;
-  MachineBasicBlock *emitMVCWrapper(MachineInstr *MI,
-                                    MachineBasicBlock *BB) const;
+  MachineBasicBlock *emitMemMemWrapper(MachineInstr *MI,
+                                       MachineBasicBlock *BB,
+                                       unsigned Opcode) const;
  };
  } // end namespace llvm
  
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp

index 9ee60aa80c3f82881a1580fe03e909a36ecefdfb..54a8669359907e140b487c60534d00899c936974 100644 (file)
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -293,6 +293,99 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
    return Count;
  }
  
+bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI,
+                                      unsigned &SrcReg, unsigned &SrcReg2,
+                                      int &Mask, int &Value) const {
+  assert(MI->isCompare() && "Caller should have checked for a comparison");
+
+  if (MI->getNumExplicitOperands() == 2 &&
+      MI->getOperand(0).isReg() &&
+      MI->getOperand(1).isImm()) {
+    SrcReg = MI->getOperand(0).getReg();
+    SrcReg2 = 0;
+    Value = MI->getOperand(1).getImm();
+    Mask = ~0;
+    return true;
+  }
+
+  return false;
+}
+
+// If Reg is a virtual register that is used by only a single non-debug
+// instruction, return the defining instruction, otherwise return null.
+static MachineInstr *getDefSingleUse(const MachineRegisterInfo *MRI,
+                                     unsigned Reg) {
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return 0;
+
+  MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
+  MachineRegisterInfo::use_nodbg_iterator E = MRI->use_nodbg_end();
+  if (I == E || llvm::next(I) != E)
+    return 0;
+
+  return MRI->getUniqueVRegDef(Reg);
+}
+
+// Return true if MI is a shift of type Opcode by Imm bits.
+static bool isShift(MachineInstr *MI, int Opcode, int64_t Imm) {
+  return (MI->getOpcode() == Opcode &&
+          !MI->getOperand(2).getReg() &&
+          MI->getOperand(3).getImm() == Imm);
+}
+
+// Compare compares SrcReg against zero.  Check whether SrcReg contains
+// the result of an IPM sequence that is only used by Compare.  Try to
+// delete both of them if so and return true if a change was made.
+static bool removeIPM(MachineInstr *Compare, unsigned SrcReg,
+                      const MachineRegisterInfo *MRI,
+                      const TargetRegisterInfo *TRI) {
+  MachineInstr *SRA = getDefSingleUse(MRI, SrcReg);
+  if (!SRA || !isShift(SRA, SystemZ::SRA, 30))
+    return false;
+
+  MachineInstr *SLL = getDefSingleUse(MRI, SRA->getOperand(1).getReg());
+  if (!SLL || !isShift(SLL, SystemZ::SLL, 2))
+    return false;
+
+  MachineInstr *IPM = getDefSingleUse(MRI, SLL->getOperand(1).getReg());
+  if (!IPM || IPM->getOpcode() != SystemZ::IPM)
+    return false;
+
+  // Check that there are no assignments to CC between the IPM and Compare,
+  // except for the SRA that we'd like to delete.  We can ignore SLL because
+  // it does not assign to CC.  We can also ignore uses of the SRA CC result,
+  // since it is effectively restoring CC to the value it had before IPM
+  // (for all current use cases).
+  if (IPM->getParent() != Compare->getParent())
+    return false;
+  MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare;
+  for (++MBBI; MBBI != MBBE; ++MBBI) {
+    MachineInstr *MI = MBBI;
+    if (MI != SRA && MI->modifiesRegister(SystemZ::CC, TRI))
+      return false;
+  }
+
+  IPM->eraseFromParent();
+  SLL->eraseFromParent();
+  SRA->eraseFromParent();
+  Compare->eraseFromParent();
+  return true;
+}
+
+bool
+SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare,
+                                       unsigned SrcReg, unsigned SrcReg2,
+                                       int Mask, int Value,
+                                       const MachineRegisterInfo *MRI) const {
+  assert(!SrcReg2 && "Only optimizing constant comparisons so far");
+  bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0;
+  if (Value == 0 &&
+      !IsLogical &&
+      removeIPM(Compare, SrcReg, MRI, TM.getRegisterInfo()))
+    return true;
+  return false;
+}
+
  // If Opcode is a move that has a conditional variant, return that variant,
  // otherwise return 0.
  static unsigned getConditionalMove(unsigned Opcode) {
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h

index 276fd3b7a1bb9c934b3d44a1c5f668e0e71adb46..3c4e8af0e61411b56722d4871d7eb1072bbf43dc 100644 (file)
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -129,6 +129,12 @@ public:
                                  MachineBasicBlock *FBB,
                                  const SmallVectorImpl<MachineOperand> &Cond,
                                  DebugLoc DL) const LLVM_OVERRIDE;
+  bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+                      unsigned &SrcReg2, int &Mask, int &Value) const
+    LLVM_OVERRIDE;
+  bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+                            unsigned SrcReg2, int Mask, int Value,
+                            const MachineRegisterInfo *MRI) const LLVM_OVERRIDE;
    virtual bool isPredicable(MachineInstr *MI) const LLVM_OVERRIDE;
    virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
                                     unsigned ExtraPredCycles,
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td

index a7181d68a7f01f621caa0694eb3ab8af6f3c9351..834ffedcf3f076cd55cc17453bb04498f925fa74 100644 (file)
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1117,7 +1117,7 @@ let Defs = [CC] in {
  
  // Extract CC into bits 29 and 28 of a register.
  let Uses = [CC] in
-  def IPM : InherentRRE<"ipm", 0xB222, GR32, (null_frag)>;
+  def IPM : InherentRRE<"ipm", 0xB222, GR32, (z_ipm)>;
  
  // Read a 32-bit access register into a GR32.  As with all GR32 operations,
  // the upper 32 bits of the enclosing GR64 remain unchanged, which is useful
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td

index dae04de02b726582aae7f110e2c8b0a4667f3d74..8a5b909eb6d46a63cbd0fd362cc319d4a5948372 100644 (file)
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -58,6 +58,7 @@ def SDT_ZMemMemLength       : SDTypeProfile<0, 3,
                                              [SDTCisPtrTy<0>,
                                               SDTCisPtrTy<1>,
                                               SDTCisVT<2, i32>]>;
+def SDT_ZI32Intrinsic       : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
  
  //===----------------------------------------------------------------------===//
  // Node definitions
@@ -112,7 +113,9 @@ def z_atomic_cmp_swapw  : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>;
  def z_mvc               : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength,
                                   [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
  def z_clc               : SDNode<"SystemZISD::CLC", SDT_ZMemMemLength,
-                                 [SDNPHasChain, SDNPMayLoad]>;
+                                 [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
+def z_ipm               : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic,
+                                 [SDNPInGlue]>;
  
  //===----------------------------------------------------------------------===//
  // Pattern fragments
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp

index 4ca9292092d066f3f423d8a6699b4aa52ff7e212..341dc9465502a0e2c14bc556a9b6c634235724f3 100644 (file)
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -125,3 +125,30 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
    }
    return SDValue();
  }
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+                        SDValue Src1, SDValue Src2, SDValue Size,
+                        MachinePointerInfo Op1PtrInfo,
+                        MachinePointerInfo Op2PtrInfo) const {
+  if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) {
+    uint64_t Bytes = CSize->getZExtValue();
+    if (Bytes >= 1 && Bytes <= 0x100) {
+      // A single CLC.
+      SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+      Chain = DAG.getNode(SystemZISD::CLC, DL, VTs, Chain,
+                          Src1, Src2, Size);
+      SDValue Glue = Chain.getValue(1);
+      // IPM inserts the CC value into bits 29 and 28, with 0 meaning "equal",
+      // 1 meaning "greater" and 2 meaning "less".  Convert them into an
+      // integer that is respectively equal, greater or less than 0.
+      SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+      SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, IPM,
+                                DAG.getConstant(2, MVT::i32));
+      SDValue SRA = DAG.getNode(ISD::SRA, DL, MVT::i32, SHL,
+                                DAG.getConstant(30, MVT::i32));
+      return std::make_pair(SRA, Chain);
+    }
+  }
+  return std::make_pair(SDValue(), SDValue());
+}
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h

index 9138a9cc082412c245bea4cc7d9932f289ff88a1..c757e167071dda5dfc87dfb528fd0eee4dfc4d6a 100644 (file)
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -38,7 +38,13 @@ public:
    EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL,
                            SDValue Chain, SDValue Dst, SDValue Byte,
                            SDValue Size, unsigned Align, bool IsVolatile,
-                          MachinePointerInfo DstPtrInfo) const;
+                          MachinePointerInfo DstPtrInfo) const LLVM_OVERRIDE;
+
+  virtual std::pair<SDValue, SDValue>
+  EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+                          SDValue Src1, SDValue Src2, SDValue Size,
+                          MachinePointerInfo Op1PtrInfo,
+                          MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE;
  };
  
  }
diff --git a/test/CodeGen/SystemZ/memcmp-01.ll b/test/CodeGen/SystemZ/memcmp-01.ll

new file mode 100644 (file)

index 0000000..3747769
--- /dev/null
+++ b/test/CodeGen/SystemZ/memcmp-01.ll
@@ -0,0 +1,134 @@
+; Test memcmp using CLC.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare signext i32 @memcmp(i8 *%src1, i8 *%src2, i64 %size)
+
+; Zero-length comparisons should be optimized away.
+define i32 @f1(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f1:
+; CHECK: lhi %r2, 0
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 0)
+  ret i32 %res
+}
+
+; Check a case where the result is used as an integer.
+define i32 @f2(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f2:
+; CHECK: clc 0(2,%r2), 0(%r3)
+; CHECK: ipm %r2
+; CHECK: sll %r2, 2
+; CHECK: sra %r2, 30
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 2)
+  ret i32 %res
+}
+
+; Check a case where the result is tested for equality.
+define void @f3(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f3:
+; CHECK: clc 0(3,%r2), 0(%r3)
+; CHECK-NEXT: je {{\..*}}
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 3)
+  %cmp = icmp eq i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check a case where the result is tested for inequality.
+define void @f4(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f4:
+; CHECK: clc 0(4,%r2), 0(%r3)
+; CHECK-NEXT: jlh {{\..*}}
+; CHECK: br %r14
+entry:
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 4)
+  %cmp = icmp ne i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check a case where the result is tested via slt.
+define void @f5(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f5:
+; CHECK: clc 0(5,%r2), 0(%r3)
+; CHECK-NEXT: jl {{\..*}}
+; CHECK: br %r14
+entry:
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 5)
+  %cmp = icmp slt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check a case where the result is tested for sgt.
+define void @f6(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f6:
+; CHECK: clc 0(6,%r2), 0(%r3)
+; CHECK-NEXT: jh {{\..*}}
+; CHECK: br %r14
+entry:
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 6)
+  %cmp = icmp sgt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the upper end of the CLC range.  Here the result is used both as
+; an integer and for branching, but it's better to branch on the result
+; of the SRA.
+define i32 @f7(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f7:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: ipm %r2
+; CHECK: sll %r2, 2
+; CHECK: sra %r2, 30
+; CHECK: jl {{.L*}}
+; CHECK: br %r14
+entry:
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 256)
+  %cmp = icmp slt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; 257 bytes is too big for a single CLC.  For now expect a call instead.
+define i32 @f8(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f8:
+; CHECK: brasl %r14, memcmp@PLT
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257)
+  ret i32 %res
+}
author	Richard Sandiford <rsandifo@linux.vnet.ibm.com>
	Mon, 12 Aug 2013 10:28:10 +0000 (10:28 +0000)
committer	Richard Sandiford <rsandifo@linux.vnet.ibm.com>
	Mon, 12 Aug 2013 10:28:10 +0000 (10:28 +0000)
include/llvm/Target/TargetSelectionDAGInfo.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
lib/Target/SystemZ/README.txt		patch \| blob \| history
lib/Target/SystemZ/SystemZISelLowering.cpp		patch \| blob \| history
lib/Target/SystemZ/SystemZISelLowering.h		patch \| blob \| history
lib/Target/SystemZ/SystemZInstrInfo.cpp		patch \| blob \| history
lib/Target/SystemZ/SystemZInstrInfo.h		patch \| blob \| history
lib/Target/SystemZ/SystemZInstrInfo.td		patch \| blob \| history
lib/Target/SystemZ/SystemZOperators.td		patch \| blob \| history
lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp		patch \| blob \| history
lib/Target/SystemZ/SystemZSelectionDAGInfo.h		patch \| blob \| history
test/CodeGen/SystemZ/memcmp-01.ll	[new file with mode: 0644]	patch \| blob