return NULL;
}
+ /// This callback is used to prepare for a volatile or atomic load.
+ /// It takes a chain node as input and returns the chain for the load itself.
+ ///
+ /// Having a callback like this is necessary for targets like SystemZ,
+ /// which allows a CPU to reuse the result of a previous load indefinitely,
+ /// even if a cache-coherent store is performed by another CPU. The default
+ /// implementation does nothing.
+ virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL,
+ SelectionDAG &DAG) const {
+ return Chain;
+ }
+
/// This callback is invoked by the type legalizer to legalize nodes with an
/// illegal operand type but legal result types. It replaces the
/// LowerOperation callback in the type Legalizer. The reason we can not do
SDValue Root;
bool ConstantMemory = false;
- if (I.isVolatile() || NumValues > MaxParallelChains)
+ if (isVolatile || NumValues > MaxParallelChains)
// Serialize volatile loads with other side effects.
Root = getRoot();
else if (AA->pointsToConstantMemory(
Root = DAG.getRoot();
}
+ const TargetLowering *TLI = TM.getTargetLowering();
+ if (isVolatile)
+ Root = TLI->prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG);
+
SmallVector<SDValue, 4> Values(NumValues);
SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
NumValues));
if (I.getAlignment() < VT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
+ InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG);
SDValue L =
DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
getValue(I.getPointerOperand()),
#undef LOWER_HIGH
+ case SystemZ::Serialize:
+ if (Subtarget->hasFastSerialization())
+ LoweredMI = MCInstBuilder(SystemZ::AsmBCR)
+ .addImm(14).addReg(SystemZ::R0D);
+ else
+ LoweredMI = MCInstBuilder(SystemZ::AsmBCR)
+ .addImm(15).addReg(SystemZ::R0D);
+ break;
+
default:
Lower.lower(MI, LoweredMI);
break;
RetOps.data(), RetOps.size());
}
+SDValue SystemZTargetLowering::
+prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const {
+ return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain);
+}
+
// CC is a comparison that will be implemented using an integer or
// floating-point comparison. Return the condition code mask for
// a branch on true. In the integer case, CCMASK_CMP_UO is set for
OPCODE(STPCPY);
OPCODE(SEARCH_STRING);
OPCODE(IPM);
+ OPCODE(SERIALIZE);
OPCODE(ATOMIC_SWAPW);
OPCODE(ATOMIC_LOADW_ADD);
OPCODE(ATOMIC_LOADW_SUB);
// Store the CC value in bits 29 and 28 of an integer.
IPM,
+ // Perform a serialization operation. (BCR 15,0 or BCR 14,0.)
+ SERIALIZE,
+
// Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
// ATOMIC_LOAD_<op>.
//
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
SDLoc DL, SelectionDAG &DAG) const LLVM_OVERRIDE;
+ virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL,
+ SelectionDAG &DAG) const
+ LLVM_OVERRIDE;
private:
const SystemZSubtarget &Subtarget;
SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG,
unsigned Opcode) const;
SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerLOAD_SEQUENCE_POINT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
// Atomic operations
//===----------------------------------------------------------------------===//
+def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>;
+
def ATOMIC_SWAPW : AtomicLoadWBinaryReg<z_atomic_swapw>;
def ATOMIC_SWAP_32 : AtomicLoadBinaryReg32<atomic_swap_32>;
def ATOMIC_SWAP_64 : AtomicLoadBinaryReg64<atomic_swap_64>;
def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>;
def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>;
+def z_serialize : SDNode<"SystemZISD::SERIALIZE", SDTNone,
+ [SDNPHasChain, SDNPMayStore]>;
+
class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
: SDNode<"SystemZISD::"##name, profile,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
"Assume that the floating-point extension facility is installed"
>;
+def FeatureFastSerialization : SystemZFeature<
+ "fast-serialization", "FastSerialization",
+ "Assume that the fast-serialization facility is installed"
+>;
+
def : Processor<"generic", NoItineraries, []>;
def : Processor<"z10", NoItineraries, []>;
def : Processor<"z196", NoItineraries,
[FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
- FeatureFPExtension]>;
+ FeatureFPExtension, FeatureFastSerialization]>;
def : Processor<"zEC12", NoItineraries,
[FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
- FeatureFPExtension]>;
+ FeatureFPExtension, FeatureFastSerialization]>;
const std::string &FS)
: SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
- TargetTriple(TT) {
+ HasFastSerialization(false), TargetTriple(TT) {
std::string CPUName = CPU;
if (CPUName.empty())
CPUName = "generic";
bool HasLoadStoreOnCond;
bool HasHighWord;
bool HasFPExtension;
+ bool HasFastSerialization;
private:
Triple TargetTriple;
// Return true if the target has the floating-point extension facility.
bool hasFPExtension() const { return HasFPExtension; }
+ // Return true if the target has the fast-serialization facility.
+ bool hasFastSerialization() const { return HasFastSerialization; }
+
// Return true if GV can be accessed using LARL for reloc model RM
// and code model CM.
bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM,
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
print ' %%bstop%d = getelementptr i32 *%%stop, i64 %d' % (i, i)
- print ' %%bcur%d = load volatile i32 *%%bstop%d' % (i, i)
+ print ' %%bcur%d = load i32 *%%bstop%d' % (i, i)
print ' %%btest%d = icmp eq i32 %%limit, %%bcur%d' % (i, i)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
print ''
for i in xrange(branch_blocks):
print ' %%astop%d = getelementptr i32 *%%stop, i64 %d' % (i, i + 25)
- print ' %%acur%d = load volatile i32 *%%astop%d' % (i, i)
+ print ' %%acur%d = load i32 *%%astop%d' % (i, i)
print ' %%atest%d = icmp eq i32 %%limit, %%acur%d' % (i, i)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
print ''
print 'b%d:' % i
print ' store volatile i8 %d, i8 *%%base' % value
print ' %%astop%d = getelementptr i32 *%%stop, i64 %d' % (i, i)
- print ' %%acur%d = load volatile i32 *%%astop%d' % (i, i)
+ print ' %%acur%d = load i32 *%%astop%d' % (i, i)
print ' %%atest%d = icmp eq i32 %%limit, %%acur%d' % (i, i)
print ' br i1 %%atest%d, label %%%s, label %%%s' % (i, other, next)
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
print ' %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i)
- print ' %%bcur%d = load volatile i8 *%%bstop%d' % (i, i)
+ print ' %%bcur%d = load i8 *%%bstop%d' % (i, i)
print ' %%bext%d = sext i8 %%bcur%d to i32' % (i, i)
print ' %%btest%d = icmp eq i32 %%limit, %%bext%d' % (i, i)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
for i in xrange(branch_blocks):
print ' %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25)
- print ' %%acur%d = load volatile i8 *%%astop%d' % (i, i)
+ print ' %%acur%d = load i8 *%%astop%d' % (i, i)
print ' %%aext%d = sext i8 %%acur%d to i32' % (i, i)
print ' %%atest%d = icmp eq i32 %%limit, %%aext%d' % (i, i)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
print ' %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i)
- print ' %%bcur%d = load volatile i8 *%%bstop%d' % (i, i)
+ print ' %%bcur%d = load i8 *%%bstop%d' % (i, i)
print ' %%bext%d = sext i8 %%bcur%d to i64' % (i, i)
print ' %%btest%d = icmp eq i64 %%limit, %%bext%d' % (i, i)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
for i in xrange(branch_blocks):
print ' %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25)
- print ' %%acur%d = load volatile i8 *%%astop%d' % (i, i)
+ print ' %%acur%d = load i8 *%%astop%d' % (i, i)
print ' %%aext%d = sext i8 %%acur%d to i64' % (i, i)
print ' %%atest%d = icmp eq i64 %%limit, %%aext%d' % (i, i)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
for i in xrange(branch_blocks):
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
- print ' %%bcur%d = load volatile i8 *%%stop' % i
+ print ' %%bcur%d = load i8 *%%stop' % i
print ' %%bext%d = sext i8 %%bcur%d to i32' % (i, i)
print ' %%btest%d = icmp slt i32 %%bext%d, %d' % (i, i, i + 50)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
for i in xrange(branch_blocks):
- print ' %%acur%d = load volatile i8 *%%stop' % i
+ print ' %%acur%d = load i8 *%%stop' % i
print ' %%aext%d = sext i8 %%acur%d to i32' % (i, i)
print ' %%atest%d = icmp slt i32 %%aext%d, %d' % (i, i, i + 100)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
for i in xrange(branch_blocks):
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
- print ' %%bcur%d = load volatile i8 *%%stop' % i
+ print ' %%bcur%d = load i8 *%%stop' % i
print ' %%bext%d = sext i8 %%bcur%d to i64' % (i, i)
print ' %%btest%d = icmp slt i64 %%bext%d, %d' % (i, i, i + 50)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
for i in xrange(branch_blocks):
- print ' %%acur%d = load volatile i8 *%%stop' % i
+ print ' %%acur%d = load i8 *%%stop' % i
print ' %%aext%d = sext i8 %%acur%d to i64' % (i, i)
print ' %%atest%d = icmp slt i64 %%aext%d, %d' % (i, i, i + 100)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
print ' %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i)
- print ' %%bcur%d = load volatile i8 *%%bstop%d' % (i, i)
+ print ' %%bcur%d = load i8 *%%bstop%d' % (i, i)
print ' %%bext%d = sext i8 %%bcur%d to i32' % (i, i)
print ' %%btest%d = icmp ult i32 %%limit, %%bext%d' % (i, i)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
for i in xrange(branch_blocks):
print ' %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25)
- print ' %%acur%d = load volatile i8 *%%astop%d' % (i, i)
+ print ' %%acur%d = load i8 *%%astop%d' % (i, i)
print ' %%aext%d = sext i8 %%acur%d to i32' % (i, i)
print ' %%atest%d = icmp ult i32 %%limit, %%aext%d' % (i, i)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
print ' %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i)
- print ' %%bcur%d = load volatile i8 *%%bstop%d' % (i, i)
+ print ' %%bcur%d = load i8 *%%bstop%d' % (i, i)
print ' %%bext%d = sext i8 %%bcur%d to i64' % (i, i)
print ' %%btest%d = icmp ult i64 %%limit, %%bext%d' % (i, i)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
for i in xrange(branch_blocks):
print ' %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25)
- print ' %%acur%d = load volatile i8 *%%astop%d' % (i, i)
+ print ' %%acur%d = load i8 *%%astop%d' % (i, i)
print ' %%aext%d = sext i8 %%acur%d to i64' % (i, i)
print ' %%atest%d = icmp ult i64 %%limit, %%aext%d' % (i, i)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
for i in xrange(branch_blocks):
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
- print ' %%bcur%da = load volatile i32 *%%stopa' % i
- print ' %%bcur%db = load volatile i32 *%%stopb' % i
+ print ' %%bcur%da = load i32 *%%stopa' % i
+ print ' %%bcur%db = load i32 *%%stopb' % i
print ' %%bsub%d = sub i32 %%bcur%da, %%bcur%db' % (i, i, i)
print ' %%btest%d = icmp ult i32 %%bsub%d, %d' % (i, i, i + 50)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
for i in xrange(branch_blocks):
- print ' %%acur%da = load volatile i32 *%%stopa' % i
- print ' %%acur%db = load volatile i32 *%%stopb' % i
+ print ' %%acur%da = load i32 *%%stopa' % i
+ print ' %%acur%db = load i32 *%%stopb' % i
print ' %%asub%d = sub i32 %%acur%da, %%acur%db' % (i, i, i)
print ' %%atest%d = icmp ult i32 %%asub%d, %d' % (i, i, i + 100)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
for i in xrange(branch_blocks):
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
- print ' %%bcur%da = load volatile i64 *%%stopa' % i
- print ' %%bcur%db = load volatile i64 *%%stopb' % i
+ print ' %%bcur%da = load i64 *%%stopa' % i
+ print ' %%bcur%db = load i64 *%%stopb' % i
print ' %%bsub%d = sub i64 %%bcur%da, %%bcur%db' % (i, i, i)
print ' %%btest%d = icmp ult i64 %%bsub%d, %d' % (i, i, i + 50)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
for i in xrange(branch_blocks):
- print ' %%acur%da = load volatile i64 *%%stopa' % i
- print ' %%acur%db = load volatile i64 *%%stopb' % i
+ print ' %%acur%da = load i64 *%%stopa' % i
+ print ' %%acur%db = load i64 *%%stopb' % i
print ' %%asub%d = sub i64 %%acur%da, %%acur%db' % (i, i, i)
print ' %%atest%d = icmp ult i64 %%asub%d, %d' % (i, i, i + 100)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
; And again with maximum register pressure. The only spill slots that the
; NOFP case needs are the emergency ones, so the offsets are the same as for f2.
-; However, the FP case uses %r11 as the frame pointer and must therefore
-; spill a second register. This leads to an extra displacement of 8.
+; The FP case needs to spill an extra register and is too dependent on
+; register allocation heuristics for a stable test.
define void @f11(i32 *%vptr) {
; CHECK-NOFP-LABEL: f11:
; CHECK-NOFP: stmg %r6, %r15,
; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
; CHECK-NOFP: lmg %r6, %r15,
; CHECK-NOFP: br %r14
-;
-; CHECK-FP-LABEL: f11:
-; CHECK-FP: stmg %r6, %r15,
-; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11)
-; CHECK-FP: lay [[REGISTER]], 4096(%r11)
-; CHECK-FP: mvhi 8([[REGISTER]]), 42
-; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
-; CHECK-FP: lmg %r6, %r15,
-; CHECK-FP: br %r14
%i0 = load volatile i32 *%vptr
%i1 = load volatile i32 *%vptr
%i3 = load volatile i32 *%vptr
; And again with maximum register pressure. The only spill slots that the
; NOFP case needs are the emergency ones, so the offsets are the same as for f4.
-; However, the FP case uses %r11 as the frame pointer and must therefore
-; spill a second register. This leads to an extra displacement of 8.
+; The FP case needs to spill an extra register and is too dependent on
+; register allocation heuristics for a stable test.
define void @f11(i32 *%vptr) {
; CHECK-NOFP-LABEL: f11:
; CHECK-NOFP: stmg %r6, %r15,
; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
; CHECK-NOFP: lmg %r6, %r15,
; CHECK-NOFP: br %r14
-;
-; CHECK-FP-LABEL: f11:
-; CHECK-FP: stmg %r6, %r15,
-; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11)
-; CHECK-FP: llilh [[REGISTER]], 8
-; CHECK-FP: agr [[REGISTER]], %r11
-; CHECK-FP: mvi 8([[REGISTER]]), 42
-; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
-; CHECK-FP: lmg %r6, %r15,
-; CHECK-FP: br %r14
%i0 = load volatile i32 *%vptr
%i1 = load volatile i32 *%vptr
%i3 = load volatile i32 *%vptr
--- /dev/null
+; Test serialization instructions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
+; RUN: FileCheck %s -check-prefix=CHECK-FULL
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | \
+; RUN: FileCheck %s -check-prefix=CHECK-FAST
+
+; Check that volatile loads produce a serialisation.
+define i32 @f1(i32 *%src) {
+; CHECK-FULL-LABEL: f1:
+; CHECK-FULL: bcr 15, %r0
+; CHECK-FULL: l %r2, 0(%r2)
+; CHECK-FULL: br %r14
+;
+; CHECK-FAST-LABEL: f1:
+; CHECK-FAST: bcr 14, %r0
+; CHECK-FAST: l %r2, 0(%r2)
+; CHECK-FAST: br %r14
+ %val = load volatile i32 *%src
+ ret i32 %val
+}
; CHECK: stgrl [[REG]], h8
; CHECK: br %r14
entry:
+ %val8 = load volatile i64 *@h8
%val0 = load volatile i64 *@h0
%val1 = load volatile i64 *@h1
%val2 = load volatile i64 *@h2
%val5 = load volatile i64 *@h5
%val6 = load volatile i64 *@h6
%val7 = load volatile i64 *@h7
- %val8 = load volatile i64 *@h8
%val9 = load volatile i64 *@h9
call void @foo()