--
-Atomic loads and stores use the default compare-and-swap based implementation.
-This is much too conservative in practice, since the architecture guarantees
-that 1-, 2-, 4- and 8-byte loads and stores to aligned addresses are
-inherently atomic.
-
---
-
If needed, we can support 16-byte atomics using LPQ, STPQ and CSDG.
--
setOperationAction(ISD::SDIVREM, VT, Custom);
setOperationAction(ISD::UDIVREM, VT, Custom);
- // Expand ATOMIC_LOAD and ATOMIC_STORE using ATOMIC_CMP_SWAP.
- // FIXME: probably much too conservative.
- setOperationAction(ISD::ATOMIC_LOAD, VT, Expand);
- setOperationAction(ISD::ATOMIC_STORE, VT, Expand);
+ // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
+ // stores, putting a serialization instruction after the stores.
+ setOperationAction(ISD::ATOMIC_LOAD, VT, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
// No special instructions for these.
setOperationAction(ISD::CTPOP, VT, Expand);
MVT::i64, HighOp, Low32);
}
+// Op is an atomic load. Lower it into a normal volatile load.
+SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
+ SelectionDAG &DAG) const {
+ AtomicSDNode *Node = cast<AtomicSDNode>(Op.getNode());
+ return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
+ Node->getChain(), Node->getBasePtr(),
+ Node->getMemoryVT(), Node->getMemOperand());
+}
+
+// Op is an atomic store. Lower it into a normal volatile store followed
+// by a serialization.
+SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
+ SelectionDAG &DAG) const {
+ AtomicSDNode *Node = cast<AtomicSDNode>(Op.getNode());
+ SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
+ Node->getBasePtr(), Node->getMemoryVT(),
+ Node->getMemOperand());
+ return SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), MVT::Other,
+ Chain), 0);
+}
+
// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
-SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
- SelectionDAG &DAG,
- unsigned Opcode) const {
+SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
+ SelectionDAG &DAG,
+ unsigned Opcode) const {
AtomicSDNode *Node = cast<AtomicSDNode>(Op.getNode());
// 32-bit operations need no code outside the main loop.
case ISD::OR:
return lowerOR(Op, DAG);
case ISD::ATOMIC_SWAP:
- return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_SWAPW);
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
+ case ISD::ATOMIC_STORE:
+ return lowerATOMIC_STORE(Op, DAG);
+ case ISD::ATOMIC_LOAD:
+ return lowerATOMIC_LOAD(Op, DAG);
case ISD::ATOMIC_LOAD_ADD:
- return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
case ISD::ATOMIC_LOAD_SUB:
- return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
case ISD::ATOMIC_LOAD_AND:
- return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
case ISD::ATOMIC_LOAD_OR:
- return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
case ISD::ATOMIC_LOAD_XOR:
- return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
case ISD::ATOMIC_LOAD_NAND:
- return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
case ISD::ATOMIC_LOAD_MIN:
- return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
case ISD::ATOMIC_LOAD_MAX:
- return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
case ISD::ATOMIC_LOAD_UMIN:
- return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
case ISD::ATOMIC_LOAD_UMAX:
- return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
case ISD::ATOMIC_CMP_SWAP:
return lowerATOMIC_CMP_SWAP(Op, DAG);
case ISD::STACKSAVE:
SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG,
- unsigned Opcode) const;
+ SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
+ unsigned Opcode) const;
SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerLOAD_SEQUENCE_POINT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-; This is just a placeholder to make sure that loads are handled.
-; The CS-based sequence is probably far too conservative.
define i8 @f1(i8 *%src) {
; CHECK-LABEL: f1:
-; CHECK: cs
+; CHECK: bcr 1{{[45]}}, %r0
+; CHECK: lb %r2, 0(%r2)
; CHECK: br %r14
%val = load atomic i8 *%src seq_cst, align 1
ret i8 %val
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-; This is just a placeholder to make sure that loads are handled.
-; The CS-based sequence is probably far too conservative.
define i16 @f1(i16 *%src) {
; CHECK-LABEL: f1:
-; CHECK: cs
+; CHECK: bcr 1{{[45]}}, %r0
+; CHECK: lh %r2, 0(%r2)
; CHECK: br %r14
%val = load atomic i16 *%src seq_cst, align 2
ret i16 %val
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-; This is just a placeholder to make sure that loads are handled.
-; Using CS is probably too conservative.
-define i32 @f1(i32 %dummy, i32 *%src) {
+define i32 @f1(i32 *%src) {
; CHECK-LABEL: f1:
-; CHECK: lhi %r2, 0
-; CHECK: cs %r2, %r2, 0(%r3)
+; CHECK: bcr 1{{[45]}}, %r0
+; CHECK: l %r2, 0(%r2)
; CHECK: br %r14
%val = load atomic i32 *%src seq_cst, align 4
ret i32 %val
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-; This is just a placeholder to make sure that loads are handled.
-; Using CSG is probably too conservative.
-define i64 @f1(i64 %dummy, i64 *%src) {
+define i64 @f1(i64 *%src) {
; CHECK-LABEL: f1:
-; CHECK: lghi %r2, 0
-; CHECK: csg %r2, %r2, 0(%r3)
+; CHECK: bcr 1{{[45]}}, %r0
+; CHECK: lg %r2, 0(%r2)
; CHECK: br %r14
%val = load atomic i64 *%src seq_cst, align 8
ret i64 %val
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-; This is just a placeholder to make sure that stores are handled.
-; The CS-based sequence is probably far too conservative.
define void @f1(i8 %val, i8 *%src) {
; CHECK-LABEL: f1:
-; CHECK: cs
+; CHECK: stc %r2, 0(%r3)
+; CHECK: bcr 1{{[45]}}, %r0
; CHECK: br %r14
store atomic i8 %val, i8 *%src seq_cst, align 1
ret void
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-; This is just a placeholder to make sure that stores are handled.
-; The CS-based sequence is probably far too conservative.
define void @f1(i16 %val, i16 *%src) {
; CHECK-LABEL: f1:
-; CHECK: cs
+; CHECK: sth %r2, 0(%r3)
+; CHECK: bcr 1{{[45]}}, %r0
; CHECK: br %r14
store atomic i16 %val, i16 *%src seq_cst, align 2
ret void
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-; This is just a placeholder to make sure that stores are handled.
-; Using CS is probably too conservative.
define void @f1(i32 %val, i32 *%src) {
; CHECK-LABEL: f1:
-; CHECK: l %r0, 0(%r3)
-; CHECK: [[LABEL:\.[^:]*]]:
-; CHECK: cs %r0, %r2, 0(%r3)
-; CHECK: jl [[LABEL]]
+; CHECK: st %r2, 0(%r3)
+; CHECK: bcr 1{{[45]}}, %r0
; CHECK: br %r14
store atomic i32 %val, i32 *%src seq_cst, align 4
ret void
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-; This is just a placeholder to make sure that stores are handled.
-; Using CS is probably too conservative.
define void @f1(i64 %val, i64 *%src) {
; CHECK-LABEL: f1:
-; CHECK: lg %r0, 0(%r3)
-; CHECK: [[LABEL:\.[^:]*]]:
-; CHECK: csg %r0, %r2, 0(%r3)
-; CHECK: jl [[LABEL]]
+; CHECK: stg %r2, 0(%r3)
+; CHECK: bcr 1{{[45]}}, %r0
; CHECK: br %r14
store atomic i64 %val, i64 *%src seq_cst, align 8
ret void
define void @f20(i8 *%ptr, i8 %alt, i32 %limit) {
; FIXME: should use a normal load instead of CS.
; CHECK-LABEL: f20:
-; CHECK: cs {{%r[0-9]+}},
-; CHECK: jl
+; CHECK: lb {{%r[0-9]+}}, 0(%r2)
; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
; CHECK: [[LABEL]]:
-; CHECK: stc {{%r[0-9]+}},
+; CHECK: stc {{%r[0-9]+}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
%orig = load atomic i8 *%ptr unordered, align 1
; CHECK: jhe [[LABEL:[^ ]*]]
; CHECK: lb %r3, 0(%r2)
; CHECK: [[LABEL]]:
-; CHECK: cs {{%r[0-9]+}},
+; CHECK: stc %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
%orig = load i8 *%ptr
define void @f20(i16 *%ptr, i16 %alt, i32 %limit) {
; FIXME: should use a normal load instead of CS.
; CHECK-LABEL: f20:
-; CHECK: cs {{%r[0-9]+}},
-; CHECK: jl
+; CHECK: lh {{%r[0-9]+}}, 0(%r2)
; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
; CHECK: [[LABEL]]:
-; CHECK: sth {{%r[0-9]+}},
+; CHECK: sth {{%r[0-9]+}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
%orig = load atomic i16 *%ptr unordered, align 2
; CHECK: jhe [[LABEL:[^ ]*]]
; CHECK: lh %r3, 0(%r2)
; CHECK: [[LABEL]]:
-; CHECK: cs {{%r[0-9]+}},
+; CHECK: sth %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
%orig = load i16 *%ptr
define void @f16(i32 *%ptr, i32 %alt, i32 %limit) {
; FIXME: should use a normal load instead of CS.
; CHECK-LABEL: f16:
-; CHECK: cs {{%r[0-5]}}, {{%r[0-5]}}, 0(%r2)
+; CHECK: l {{%r[0-5]}}, 0(%r2)
; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
; CHECK: [[LABEL]]:
; CHECK: st {{%r[0-5]}}, 0(%r2)
; CHECK: jhe [[LABEL:[^ ]*]]
; CHECK: l %r3, 0(%r2)
; CHECK: [[LABEL]]:
-; CHECK: cs {{%r[0-5]}}, %r3, 0(%r2)
+; CHECK: st %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
%orig = load i32 *%ptr
define void @f10(i64 *%ptr, i64 %alt, i32 %limit) {
; FIXME: should use a normal load instead of CSG.
; CHECK-LABEL: f10:
-; CHECK: csg {{%r[0-5]}}, {{%r[0-5]}}, 0(%r2)
+; CHECK: lg {{%r[0-5]}}, 0(%r2)
; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
; CHECK: [[LABEL]]:
; CHECK: stg {{%r[0-5]}}, 0(%r2)
; CHECK: jhe [[LABEL:[^ ]*]]
; CHECK: lg %r3, 0(%r2)
; CHECK: [[LABEL]]:
-; CHECK: csg {{%r[0-5]}}, %r3, 0(%r2)
+; CHECK: stg %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
%orig = load i64 *%ptr