SystemZISelDAGToDAG.cpp
SystemZISelLowering.cpp
SystemZInstrInfo.cpp
+ SystemZLongBranch.cpp
SystemZMCInstLower.cpp
SystemZRegisterInfo.cpp
SystemZSubtarget.cpp
llvm_unreachable("Unknown fixup kind!");
}
-// If Opcode can be relaxed, return the relaxed form, otherwise return 0.
+// If Opcode is a relaxable interprocedural reference, return the relaxed form,
+// otherwise return 0.
static unsigned getRelaxedOpcode(unsigned Opcode) {
switch (Opcode) {
- case SystemZ::BRC: return SystemZ::BRCL;
- case SystemZ::J: return SystemZ::JG;
case SystemZ::BRAS: return SystemZ::BRASL;
}
return 0;
--
-We don't use the combined COMPARE AND BRANCH instructions. Using them
-would require a change to the way we handle out-of-range branches.
-At the moment, we start with 32-bit forms like BRCL and shorten them
-to forms like BRC where possible, but COMPARE AND BRANCH does not have
-a 32-bit form.
+We don't use the combined COMPARE AND BRANCH instructions.
--
FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
CodeGenOpt::Level OptLevel);
+ FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
} // end namespace llvm;
#endif
// jCC JoinMBB
// # fallthrough to FalseMBB
MBB = StartMBB;
- BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(CCMask).addMBB(JoinMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(CCMask).addMBB(JoinMBB);
MBB->addSuccessor(JoinMBB);
MBB->addSuccessor(FalseMBB);
.addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
.addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
- BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(MaskNE).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB);
MBB->addSuccessor(DoneMBB);
// %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
// %RotatedOldVal = RLL %OldVal, 0(%BitShift)
// CompareOpcode %RotatedOldVal, %Src2
- // BRCL KeepOldMask, UpdateMBB
+ // BRC KeepOldMask, UpdateMBB
MBB = LoopMBB;
BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
.addReg(OrigVal).addMBB(StartMBB)
.addReg(OldVal).addReg(BitShift).addImm(0);
BuildMI(MBB, DL, TII->get(CompareOpcode))
.addReg(RotatedOldVal).addReg(Src2);
- BuildMI(MBB, DL, TII->get(SystemZ::BRCL))
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(KeepOldMask).addMBB(UpdateMBB);
MBB->addSuccessor(UpdateMBB);
MBB->addSuccessor(UseAltMBB);
.addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
.addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
- BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(MaskNE).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB);
MBB->addSuccessor(DoneMBB);
.addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
BuildMI(MBB, DL, TII->get(SystemZ::CR))
.addReg(Dest).addReg(RetryCmpVal);
- BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(DoneMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(MaskNE).addMBB(DoneMBB);
MBB->addSuccessor(DoneMBB);
MBB->addSuccessor(SetMBB);
.addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
.addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp);
- BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(MaskNE).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB);
MBB->addSuccessor(DoneMBB);
#include "SystemZInstrInfo.h"
#include "SystemZInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
#define GET_INSTRINFO_CTOR
#define GET_INSTRMAP_INFO
if (Cond.empty()) {
// Unconditional branch?
assert(!FBB && "Unconditional branch with multiple successors!");
- BuildMI(&MBB, DL, get(SystemZ::JG)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(SystemZ::J)).addMBB(TBB);
return 1;
}
// Conditional branch.
unsigned Count = 0;
unsigned CC = Cond[0].getImm();
- BuildMI(&MBB, DL, get(SystemZ::BRCL)).addImm(CC).addMBB(TBB);
+ BuildMI(&MBB, DL, get(SystemZ::BRC)).addImm(CC).addMBB(TBB);
++Count;
if (FBB) {
// Two-way Conditional branch. Insert the second branch.
- BuildMI(&MBB, DL, get(SystemZ::JG)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(SystemZ::J)).addMBB(FBB);
++Count;
}
return Count;
return false;
}
+uint64_t SystemZInstrInfo::getInstSizeInBytes(const MachineInstr *MI) const {
+ if (MI->getOpcode() == TargetOpcode::INLINEASM) {
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const char *AsmStr = MI->getOperand(0).getSymbolName();
+ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+ }
+ return MI->getDesc().getSize();
+}
+
bool SystemZInstrInfo::isBranch(const MachineInstr *MI, unsigned &Cond,
const MachineOperand *&Target) const {
switch (MI->getOpcode()) {
// Return the SystemZRegisterInfo, which this class owns.
const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
+ // Return the size in bytes of MI.
+ uint64_t getInstSizeInBytes(const MachineInstr *MI) const;
+
// Return true if MI is a conditional or unconditional branch.
// When returning true, set Cond to the mask of condition-code
// values on which the instruction will branch, and set Target
def BR : InstRR<0x07, (outs), (ins ADDR64:$R2),
"br\t$R2", [(brind ADDR64:$R2)]>;
- // An assembler extended mnemonic for BRC. Use a separate instruction for
- // the asm parser, so that we don't relax Js to external symbols into JGs.
- let isCodeGenOnly = 1 in
- def J : InstRI<0xA74, (outs), (ins brtarget16:$I2), "j\t$I2", []>;
- def AsmJ : InstRI<0xA74, (outs), (ins brtarget16:$I2), "j\t$I2", []>;
+ // An assembler extended mnemonic for BRC.
+ def J : InstRI<0xA74, (outs), (ins brtarget16:$I2), "j\t$I2",
+ [(br bb:$I2)]>;
// An assembler extended mnemonic for BRCL. (The extension is "G"
// rather than "L" because "JL" is "Jump if Less".)
- def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2),
- "jg\t$I2", [(br bb:$I2)]>;
+ def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2), "jg\t$I2", []>;
}
// Conditional branches. It's easier for LLVM to handle these branches
defm BRC : CondBranches<cond4, "j$R1\t$I2", "jg$R1\t$I2">;
defm AsmBRC : CondBranches<uimm8zx4, "brc\t$R1, $I2", "brcl\t$R1, $I2">;
-def : Pat<(z_br_ccmask cond4:$cond, bb:$dst), (BRCL cond4:$cond, bb:$dst)>;
+def : Pat<(z_br_ccmask cond4:$cond, bb:$dst), (BRC cond4:$cond, bb:$dst)>;
// Define AsmParser mnemonics for each condition code.
multiclass CondExtendedMnemonic<bits<4> Cond, string name> {
--- /dev/null
+//===-- SystemZLongBranch.cpp - Branch lengthening for SystemZ ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass makes sure that all branches are in range. There are several ways
+// in which this could be done. One aggressive approach is to assume that all
+// branches are in range and successively replace those that turn out not
+// to be in range with a longer form (branch relaxation). A simple
+// implementation is to continually walk through the function relaxing
+// branches until no more changes are needed and a fixed point is reached.
+// However, in the pathological worst case, this implementation is
+// quadratic in the number of blocks; relaxing branch N can make branch N-1
+// go out of range, which in turn can make branch N-2 go out of range,
+// and so on.
+//
+// An alternative approach is to assume that all branches must be
+// converted to their long forms, then reinstate the short forms of
+// branches that, even under this pessimistic assumption, turn out to be
+// in range (branch shortening). This too can be implemented as a function
+// walk that is repeated until a fixed point is reached. In general,
+// the result of shortening is not as good as that of relaxation, and
+// shortening is also quadratic in the worst case; shortening branch N
+// can bring branch N-1 in range of the short form, which in turn can do
+// the same for branch N-2, and so on. The main advantage of shortening
+// is that each walk through the function produces valid code, so it is
+// possible to stop at any point after the first walk. The quadraticness
+// could therefore be handled with a maximum pass count, although the
+// question then becomes: what maximum count should be used?
+//
+// On SystemZ, long branches are only needed for functions bigger than 64k,
+// which are relatively rare to begin with, and the long branch sequences
+// are actually relatively cheap. It therefore doesn't seem worth spending
+// much compilation time on the problem. Instead, the approach we take is:
+//
+// (1) Check whether all branches can be short (the usual case). Exit the
+// pass if so.
+// (2) If one branch needs to be long, work out the address that each block
+// would have if all branches need to be long, as for shortening above.
+// (3) Relax any branch that is out of range according to this pessimistic
+// assumption.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-long-branch"
+
+#include "SystemZTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+STATISTIC(LongBranches, "Number of long branches.");
+
+namespace {
+ typedef MachineBasicBlock::iterator Iter;
+
+ // Represents positional information about a basic block.
+ struct MBBInfo {
+ // The address that we currently assume the block has, relative to
+ // the start of the function. This is designed so that taking the
+ // difference between two addresses gives a conservative upper bound
+ // on the distance between them.
+ uint64_t Address;
+
+ // The size of the block in bytes, excluding terminators.
+ // This value never changes.
+ uint64_t Size;
+
+ // The minimum alignment of the block, as a log2 value.
+ // This value never changes.
+ unsigned Alignment;
+
+ // The number of terminators in this block. This value never changes.
+ unsigned NumTerminators;
+
+ MBBInfo()
+ : Address(0), Size(0), Alignment(0), NumTerminators(0) {}
+ };
+
+ // Represents the state of a block terminator.
+ struct TerminatorInfo {
+ // If this terminator is a relaxable branch, this points to the branch
+ // instruction, otherwise it is null.
+ MachineInstr *Branch;
+
+ // The current address of the terminator, in the same form as
+ // for BlockInfo.
+ uint64_t Address;
+
+ // The current size of the terminator in bytes.
+ uint64_t Size;
+
+ // If Branch is nonnull, this is the number of the target block,
+ // otherwise it is unused.
+ unsigned TargetBlock;
+
+ // If Branch is nonnull, this is the length of the longest relaxed form,
+ // otherwise it is zero.
+ unsigned ExtraRelaxSize;
+
+ TerminatorInfo() : Branch(0), Size(0), TargetBlock(0), ExtraRelaxSize(0) {}
+ };
+
+ // Used to keep track of the current position while iterating over the blocks.
+ struct BlockPosition {
+ // The offset from the start of the function, in the same form
+ // as BlockInfo.
+ uint64_t Address;
+
+ // The number of low bits in Address that are known to be the same
+ // as the runtime address.
+ unsigned KnownBits;
+
+ BlockPosition(unsigned InitialAlignment)
+ : Address(0), KnownBits(InitialAlignment) {}
+ };
+
+ class SystemZLongBranch : public MachineFunctionPass {
+ public:
+ static char ID;
+ SystemZLongBranch(const SystemZTargetMachine &tm)
+ : MachineFunctionPass(ID),
+ TII(static_cast<const SystemZInstrInfo *>(tm.getInstrInfo())) {}
+
+ virtual const char *getPassName() const {
+ return "SystemZ Long Branch";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ private:
+ void skipNonTerminators(BlockPosition &Position, MBBInfo &Block);
+ void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator,
+ bool AssumeRelaxed);
+ TerminatorInfo describeTerminator(MachineInstr *MI);
+ uint64_t initMBBInfo();
+ bool mustRelaxBranch(const TerminatorInfo &Terminator);
+ bool mustRelaxABranch();
+ void setWorstCaseAddresses();
+ void relaxBranch(TerminatorInfo &Terminator);
+ void relaxBranches();
+
+ const SystemZInstrInfo *TII;
+ MachineFunction *MF;
+ SmallVector<MBBInfo, 16> MBBs;
+ SmallVector<TerminatorInfo, 16> Terminators;
+ };
+
+ char SystemZLongBranch::ID = 0;
+
+ const uint64_t MaxBackwardRange = 0x10000;
+ const uint64_t MaxForwardRange = 0xfffe;
+} // end of anonymous namespace
+
+FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) {
+ return new SystemZLongBranch(TM);
+}
+
+// Position describes the state immediately before Block. Update Block
+// accordingly and move Position to the end of the block's non-terminator
+// instructions.
+void SystemZLongBranch::skipNonTerminators(BlockPosition &Position,
+ MBBInfo &Block) {
+ if (Block.Alignment > Position.KnownBits) {
+ // When calculating the address of Block, we need to conservatively
+ // assume that Block had the worst possible misalignment.
+ Position.Address += ((uint64_t(1) << Block.Alignment) -
+ (uint64_t(1) << Position.KnownBits));
+ Position.KnownBits = Block.Alignment;
+ }
+
+ // Align the addresses.
+ uint64_t AlignMask = (uint64_t(1) << Block.Alignment) - 1;
+ Position.Address = (Position.Address + AlignMask) & ~AlignMask;
+
+ // Record the block's position.
+ Block.Address = Position.Address;
+
+ // Move past the non-terminators in the block.
+ Position.Address += Block.Size;
+}
+
+// Position describes the state immediately before Terminator.
+// Update Terminator accordingly and move Position past it.
+// Assume that Terminator will be relaxed if AssumeRelaxed.
+void SystemZLongBranch::skipTerminator(BlockPosition &Position,
+ TerminatorInfo &Terminator,
+ bool AssumeRelaxed) {
+ Terminator.Address = Position.Address;
+ Position.Address += Terminator.Size;
+ if (AssumeRelaxed)
+ Position.Address += Terminator.ExtraRelaxSize;
+}
+
+// Return a description of terminator instruction MI.
+TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
+ TerminatorInfo Terminator;
+ Terminator.Size = TII->getInstSizeInBytes(MI);
+ if (MI->isConditionalBranch() || MI->isUnconditionalBranch()) {
+ Terminator.Branch = MI;
+ switch (MI->getOpcode()) {
+ case SystemZ::J:
+ // Relaxes to JG, which is 2 bytes longer.
+ Terminator.TargetBlock = MI->getOperand(0).getMBB()->getNumber();
+ Terminator.ExtraRelaxSize = 2;
+ break;
+ case SystemZ::BRC:
+ // Relaxes to BRCL, which is 2 bytes longer. Operand 0 is the
+ // condition code mask.
+ Terminator.TargetBlock = MI->getOperand(1).getMBB()->getNumber();
+ Terminator.ExtraRelaxSize = 2;
+ break;
+ default:
+ llvm_unreachable("Unrecognized branch instruction");
+ }
+ }
+ return Terminator;
+}
+
+// Fill MBBs and Terminators, setting the addresses on the assumption
+// that no branches need relaxation. Return the size of the function under
+// this assumption.
+uint64_t SystemZLongBranch::initMBBInfo() {
+ MF->RenumberBlocks();
+ unsigned NumBlocks = MF->size();
+
+ MBBs.clear();
+ MBBs.resize(NumBlocks);
+
+ Terminators.clear();
+ Terminators.reserve(NumBlocks);
+
+ BlockPosition Position(MF->getAlignment());
+ for (unsigned I = 0; I < NumBlocks; ++I) {
+ MachineBasicBlock *MBB = MF->getBlockNumbered(I);
+ MBBInfo &Block = MBBs[I];
+
+ // Record the alignment, for quick access.
+ Block.Alignment = MBB->getAlignment();
+
+ // Calculate the size of the fixed part of the block.
+ MachineBasicBlock::iterator MI = MBB->begin();
+ MachineBasicBlock::iterator End = MBB->end();
+ while (MI != End && !MI->isTerminator()) {
+ Block.Size += TII->getInstSizeInBytes(MI);
+ ++MI;
+ }
+ skipNonTerminators(Position, Block);
+
+ // Add the terminators.
+ while (MI != End) {
+ if (!MI->isDebugValue()) {
+ assert(MI->isTerminator() && "Terminator followed by non-terminator");
+ Terminators.push_back(describeTerminator(MI));
+ skipTerminator(Position, Terminators.back(), false);
+ ++Block.NumTerminators;
+ }
+ ++MI;
+ }
+ }
+
+ return Position.Address;
+}
+
+// Return true if, under current assumptions, Terminator needs to be relaxed.
+bool SystemZLongBranch::mustRelaxBranch(const TerminatorInfo &Terminator) {
+ if (!Terminator.Branch)
+ return false;
+
+ const MBBInfo &Target = MBBs[Terminator.TargetBlock];
+ if (Target.Address < Terminator.Address) {
+ if (Terminator.Address - Target.Address <= MaxBackwardRange)
+ return false;
+ } else {
+ if (Target.Address - Terminator.Address <= MaxForwardRange)
+ return false;
+ }
+
+ return true;
+}
+
+// Return true if, under current assumptions, any terminator needs
+// to be relaxed.
+bool SystemZLongBranch::mustRelaxABranch() {
+ for (SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin(),
+ TE = Terminators.end(); TI != TE; ++TI)
+ if (mustRelaxBranch(*TI))
+ return true;
+ return false;
+}
+
+// Set the address of each block on the assumption that all branches
+// must be long.
+void SystemZLongBranch::setWorstCaseAddresses() {
+ SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin();
+ BlockPosition Position(MF->getAlignment());
+ for (SmallVector<MBBInfo, 16>::iterator BI = MBBs.begin(), BE = MBBs.end();
+ BI != BE; ++BI) {
+ skipNonTerminators(Position, *BI);
+ for (unsigned BTI = 0, BTE = BI->NumTerminators; BTI != BTE; ++BTI) {
+ skipTerminator(Position, *TI, true);
+ ++TI;
+ }
+ }
+}
+
+// Relax the branch described by Terminator.
+void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) {
+ MachineInstr *Branch = Terminator.Branch;
+ switch (Branch->getOpcode()) {
+ case SystemZ::J:
+ Branch->setDesc(TII->get(SystemZ::JG));
+ break;
+ case SystemZ::BRC:
+ Branch->setDesc(TII->get(SystemZ::BRCL));
+ break;
+ default:
+ llvm_unreachable("Unrecognized branch");
+ }
+
+ Terminator.Size += Terminator.ExtraRelaxSize;
+ Terminator.ExtraRelaxSize = 0;
+ Terminator.Branch = 0;
+
+ ++LongBranches;
+}
+
+// Relax any branches that need to be relaxed, under current assumptions.
+void SystemZLongBranch::relaxBranches() {
+ for (SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin(),
+ TE = Terminators.end(); TI != TE; ++TI)
+ if (mustRelaxBranch(*TI))
+ relaxBranch(*TI);
+}
+
+bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) {
+ MF = &F;
+ uint64_t Size = initMBBInfo();
+ if (Size <= MaxForwardRange || !mustRelaxABranch())
+ return false;
+
+ setWorstCaseAddresses();
+ relaxBranches();
+ return true;
+}
using namespace llvm;
-// Where relaxable pairs of reloc-generating instructions exist,
-// we tend to use the longest form by default, since that produces
-// correct assembly in cases where no relaxation is performed.
-// If Opcode is one such instruction, return the opcode for the
-// shortest possible form instead, otherwise return Opcode itself.
+// If Opcode is an interprocedural reference that can be shortened,
+// return the short form, otherwise return 0.
static unsigned getShortenedInstr(unsigned Opcode) {
switch (Opcode) {
- case SystemZ::BRCL: return SystemZ::BRC;
- case SystemZ::JG: return SystemZ::J;
case SystemZ::BRASL: return SystemZ::BRAS;
}
return Opcode;
return getTM<SystemZTargetMachine>();
}
- virtual bool addInstSelector();
+ virtual bool addInstSelector() LLVM_OVERRIDE;
+ virtual bool addPreEmitPass() LLVM_OVERRIDE;
};
} // end anonymous namespace
return false;
}
+bool SystemZPassConfig::addPreEmitPass() {
+ addPass(createSystemZLongBranchPass(getSystemZTargetMachine()));
+ return true;
+}
+
TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
return new SystemZPassConfig(this, PM);
}
--- /dev/null
+# Test normal conditional branches in cases where the sheer number of
+# instructions causes some branches to be out of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# before0:
+# conditional branch to after0
+# ...
+# beforeN:
+# conditional branch to after0
+# main:
+# 0xffd8 bytes, from MVIY instructions
+# conditional branch to main
+# after0:
+# ...
+# conditional branch to main
+# afterN:
+#
+# Each conditional branch sequence occupies 8 bytes if it uses a short branch
+# and 10 if it uses a long one. The ones before "main:" have to take the branch
+# length into account -- which is 4 bytes for short branches -- so the final
+# (0x28 - 4) / 8 == 4 blocks can use short branches. The ones after "main:"
+# do not, so the first 0x28 / 8 == 5 can use short branches. However,
+# the conservative algorithm we use makes one branch unnecessarily long
+# on each side.
+#
+# CHECK: c %r4, 0(%r3)
+# CHECK: jge [[LABEL:\.L[^ ]*]]
+# CHECK: c %r4, 4(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 8(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 12(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 16(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 20(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 24(%r3)
+# CHECK: j{{g?}}e [[LABEL]]
+# CHECK: c %r4, 28(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 32(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 36(%r3)
+# CHECK: je [[LABEL]]
+# ...main goes here...
+# CHECK: c %r4, 100(%r3)
+# CHECK: je [[LABEL:\.L[^ ]*]]
+# CHECK: c %r4, 104(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 108(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 112(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 116(%r3)
+# CHECK: j{{g?}}e [[LABEL]]
+# CHECK: c %r4, 120(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 124(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 128(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 132(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 136(%r3)
+# CHECK: jge [[LABEL]]
+
+branch_blocks = 10
+main_size = 0xffd8
+
+print 'define void @f1(i8 *%base, i32 *%stop, i32 %limit) {'
+print 'entry:'
+print ' br label %before0'
+print ''
+
+for i in xrange(branch_blocks):
+ next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
+ print 'before%d:' % i
+ print ' %%bstop%d = getelementptr i32 *%%stop, i64 %d' % (i, i)
+ print ' %%bcur%d = load volatile i32 *%%bstop%d' % (i, i)
+ print ' %%btest%d = icmp eq i32 %%limit, %%bcur%d' % (i, i)
+ print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
+ print ''
+
+print '%s:' % next
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+ a, b = b, a + b
+ offset = 4096 + b % 500000
+ value = a % 256
+ print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+ print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+ print ' %%astop%d = getelementptr i32 *%%stop, i64 %d' % (i, i + 25)
+ print ' %%acur%d = load volatile i32 *%%astop%d' % (i, i)
+ print ' %%atest%d = icmp eq i32 %%limit, %%acur%d' % (i, i)
+ print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
+ print ''
+ print 'after%d:' % i
+
+print ' ret void'
+print '}'
--- /dev/null
+# Test normal conditional branches in cases where block alignments cause
+# some branches to be out of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu -align-all-blocks=8 | FileCheck %s
+
+# Construct:
+#
+# b0:
+# conditional branch to end
+# ...
+# b<N>:
+# conditional branch to end
+# b<N+1>:
+# conditional branch to b0
+# ...
+# b<2*N>:
+# conditional branch to b0
+# end:
+#
+# with N == 256 + 4. The -align-all-blocks=8 option ensures that all blocks
+# are 256 bytes in size. The first 4 blocks and the last 4 blocks are then
+# out of range.
+#
+# CHECK: c %r4, 0(%r3)
+# CHECK: jge [[LABEL:\.L[^ ]*]]
+# CHECK: c %r4, 4(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 8(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 12(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 16(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 20(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 24(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 28(%r3)
+# CHECK: je [[LABEL]]
+# ...lots of other blocks...
+# CHECK: c %r4, 1004(%r3)
+# CHECK: je [[LABEL:\.L[^ ]*]]
+# CHECK: c %r4, 1008(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 1012(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 1016(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 1020(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 1024(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 1028(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 1032(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 1036(%r3)
+# CHECK: jge [[LABEL]]
+
+blocks = 256 + 4
+
+print 'define void @f1(i8 *%base, i32 *%stop, i32 %limit) {'
+print 'entry:'
+print ' br label %b0'
+print ''
+
+a, b = 1, 1
+for i in xrange(blocks):
+ a, b = b, a + b
+ value = a % 256
+ next = 'b%d' % (i + 1) if i + 1 < blocks else 'end'
+ other = 'end' if 2 * i < blocks else 'b0'
+ print 'b%d:' % i
+ print ' store volatile i8 %d, i8 *%%base' % value
+ print ' %%astop%d = getelementptr i32 *%%stop, i64 %d' % (i, i)
+ print ' %%acur%d = load volatile i32 *%%astop%d' % (i, i)
+ print ' %%atest%d = icmp eq i32 %%limit, %%acur%d' % (i, i)
+ print ' br i1 %%atest%d, label %%%s, label %%%s' % (i, other, next)
+
+print ''
+print '%s:' % next
+print ' ret void'
+print '}'
--- /dev/null
+config.suffixes = ['.py']
+
+# These tests take on the order of seconds to run, so skip them unless
+# running natively.
+if config.root.host_arch not in ['SystemZ']:
+ config.unsupported = True
+
+targets = set(config.root.targets_to_build.split())
+if not 'SystemZ' in targets:
+ config.unsupported = True
; CHECK: f1:
; CHECK: ahi %r4, -1
; CHECK: clfi %r4, 5
-; CHECK-NEXT: j{{g?}}g
+; CHECK-NEXT: j{{g?}}h
; CHECK: llgfr [[OP64:%r[0-5]]], %r4
; CHECK: sllg [[INDEX:%r[1-5]]], [[OP64]], 3
; CHECK: larl [[BASE:%r[1-5]]]
define double @f15(double %a, double %b, i16 *%ptr) {
; CHECK: f15:
; CHECK: chhsi 0(%r2), -32767
-; CHECK-NEXT: j{{g?}}g
+; CHECK-NEXT: j{{g?}}h
; CHECK: br %r14
%val = load i16 *%ptr
%ext = sext i16 %val to i32
define double @f15(double %a, double %b, i16 *%ptr) {
; CHECK: f15:
; CHECK: chhsi 0(%r2), -32767
-; CHECK-NEXT: j{{g?}}g
+; CHECK-NEXT: j{{g?}}h
; CHECK: br %r14
%val = load i16 *%ptr
%ext = sext i16 %val to i64