arm64: atomics: prefetch the destination word for write prior to stxr
authorWill Deacon <will.deacon@arm.com>
Fri, 29 May 2015 12:31:10 +0000 (13:31 +0100)
committerWill Deacon <will.deacon@arm.com>
Mon, 27 Jul 2015 14:28:53 +0000 (15:28 +0100)
The cost of changing a cacheline from shared to exclusive state can be
significant, especially when this is triggered by an exclusive store,
since it may result in having to retry the transaction.

This patch makes use of prfm to prefetch cachelines for write prior to
ldxr/stxr loops when using the ll/sc atomic routines.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
arch/arm64/include/asm/atomic_ll_sc.h
arch/arm64/include/asm/cmpxchg.h
arch/arm64/include/asm/futex.h
arch/arm64/lib/bitops.S

index 5a9fb37272d4cbefc1871603cb3770c9d8de880c..50d6abd3c43997f0c3319f37f1cfd73a16fb4964 100644 (file)
@@ -45,6 +45,7 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v))                               \
        int result;                                                     \
                                                                        \
        asm volatile("// atomic_" #op "\n"                              \
+"      prfm    pstl1strm, %2\n"                                        \
 "1:    ldxr    %w0, %2\n"                                              \
 "      " #asm_op "     %w0, %w0, %w3\n"                                \
 "      stxr    %w1, %w0, %2\n"                                         \
@@ -62,6 +63,7 @@ __LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v))              \
        int result;                                                     \
                                                                        \
        asm volatile("// atomic_" #op "_return\n"                       \
+"      prfm    pstl1strm, %2\n"                                        \
 "1:    ldxr    %w0, %2\n"                                              \
 "      " #asm_op "     %w0, %w0, %w3\n"                                \
 "      stlxr   %w1, %w0, %2\n"                                         \
@@ -98,6 +100,7 @@ __LL_SC_PREFIX(atomic_cmpxchg(atomic_t *ptr, int old, int new))
        int oldval;
 
        asm volatile("// atomic_cmpxchg\n"
+"      prfm    pstl1strm, %2\n"
 "1:    ldxr    %w1, %2\n"
 "      eor     %w0, %w1, %w3\n"
 "      cbnz    %w0, 2f\n"
@@ -121,6 +124,7 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v))                        \
        unsigned long tmp;                                              \
                                                                        \
        asm volatile("// atomic64_" #op "\n"                            \
+"      prfm    pstl1strm, %2\n"                                        \
 "1:    ldxr    %0, %2\n"                                               \
 "      " #asm_op "     %0, %0, %3\n"                                   \
 "      stxr    %w1, %0, %2\n"                                          \
@@ -138,6 +142,7 @@ __LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v))               \
        unsigned long tmp;                                              \
                                                                        \
        asm volatile("// atomic64_" #op "_return\n"                     \
+"      prfm    pstl1strm, %2\n"                                        \
 "1:    ldxr    %0, %2\n"                                               \
 "      " #asm_op "     %0, %0, %3\n"                                   \
 "      stlxr   %w1, %0, %2\n"                                          \
@@ -174,6 +179,7 @@ __LL_SC_PREFIX(atomic64_cmpxchg(atomic64_t *ptr, long old, long new))
        unsigned long res;
 
        asm volatile("// atomic64_cmpxchg\n"
+"      prfm    pstl1strm, %2\n"
 "1:    ldxr    %1, %2\n"
 "      eor     %0, %1, %3\n"
 "      cbnz    %w0, 2f\n"
@@ -196,6 +202,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
        unsigned long tmp;
 
        asm volatile("// atomic64_dec_if_positive\n"
+"      prfm    pstl1strm, %2\n"
 "1:    ldxr    %0, %2\n"
 "      subs    %0, %0, #1\n"
 "      b.mi    2f\n"
@@ -220,6 +227,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr,            \
        unsigned long tmp, oldval;                                      \
                                                                        \
        asm volatile(                                                   \
+       "       prfm    pstl1strm, %2\n"                                \
        "1:     ldxr" #sz "\t%" #w "[oldval], %[v]\n"                   \
        "       eor     %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n"  \
        "       cbnz    %" #w "[tmp], 2f\n"                             \
@@ -259,6 +267,7 @@ __LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1,           \
        unsigned long tmp, ret;                                         \
                                                                        \
        asm volatile("// __cmpxchg_double" #name "\n"                   \
+       "       prfm    pstl1strm, %2\n"                                \
        "1:     ldxp    %0, %1, %2\n"                                   \
        "       eor     %0, %0, %3\n"                                   \
        "       eor     %1, %1, %4\n"                                   \
index f70212629d02f99958ad00d9d150045b474370bf..7bfda0944c9bbc55c73e82091ba94d4670f1bbd3 100644 (file)
@@ -33,12 +33,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
        case 1:
                asm volatile(ARM64_LSE_ATOMIC_INSN(
                /* LL/SC */
+               "       prfm    pstl1strm, %2\n"
                "1:     ldxrb   %w0, %2\n"
                "       stlxrb  %w1, %w3, %2\n"
                "       cbnz    %w1, 1b\n"
                "       dmb     ish",
                /* LSE atomics */
                "       nop\n"
+               "       nop\n"
                "       swpalb  %w3, %w0, %2\n"
                "       nop\n"
                "       nop")
@@ -49,12 +51,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
        case 2:
                asm volatile(ARM64_LSE_ATOMIC_INSN(
                /* LL/SC */
+               "       prfm    pstl1strm, %2\n"
                "1:     ldxrh   %w0, %2\n"
                "       stlxrh  %w1, %w3, %2\n"
                "       cbnz    %w1, 1b\n"
                "       dmb     ish",
                /* LSE atomics */
                "       nop\n"
+               "       nop\n"
                "       swpalh  %w3, %w0, %2\n"
                "       nop\n"
                "       nop")
@@ -65,12 +69,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
        case 4:
                asm volatile(ARM64_LSE_ATOMIC_INSN(
                /* LL/SC */
+               "       prfm    pstl1strm, %2\n"
                "1:     ldxr    %w0, %2\n"
                "       stlxr   %w1, %w3, %2\n"
                "       cbnz    %w1, 1b\n"
                "       dmb     ish",
                /* LSE atomics */
                "       nop\n"
+               "       nop\n"
                "       swpal   %w3, %w0, %2\n"
                "       nop\n"
                "       nop")
@@ -81,12 +87,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
        case 8:
                asm volatile(ARM64_LSE_ATOMIC_INSN(
                /* LL/SC */
+               "       prfm    pstl1strm, %2\n"
                "1:     ldxr    %0, %2\n"
                "       stlxr   %w1, %3, %2\n"
                "       cbnz    %w1, 1b\n"
                "       dmb     ish",
                /* LSE atomics */
                "       nop\n"
+               "       nop\n"
                "       swpal   %3, %0, %2\n"
                "       nop\n"
                "       nop")
index 775e85b9d1f25ed64bf877c5e43defb3c839714b..007a69fc4f408d5f2f7e58f4070b6cb354a5e022 100644 (file)
@@ -30,6 +30,7 @@
        asm volatile(                                                   \
        ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,            \
                    CONFIG_ARM64_PAN)                                   \
+"      prfm    pstl1strm, %2\n"                                        \
 "1:    ldxr    %w1, %2\n"                                              \
        insn "\n"                                                       \
 "2:    stlxr   %w3, %w0, %2\n"                                         \
@@ -120,6 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
                return -EFAULT;
 
        asm volatile("// futex_atomic_cmpxchg_inatomic\n"
+"      prfm    pstl1strm, %2\n"
 "1:    ldxr    %w1, %2\n"
 "      sub     %w3, %w1, %w4\n"
 "      cbnz    %w3, 3f\n"
index bc18457c2bba6cfbc67564e71ca625e4d2b3bb9f..43ac736baa5bf13ed61caaa3b6d106ec313ee158 100644 (file)
@@ -31,6 +31,7 @@ ENTRY(        \name   )
        eor     w0, w0, w3              // Clear low bits
        mov     x2, #1
        add     x1, x1, x0, lsr #3      // Get word offset
+alt_lse "      prfm    pstl1strm, [x1]",       "nop"
        lsl     x3, x2, x3              // Create mask
 
 alt_lse        "1:     ldxr    x2, [x1]",              "\lse   x3, [x1]"
@@ -48,6 +49,7 @@ ENTRY(        \name   )
        eor     w0, w0, w3              // Clear low bits
        mov     x2, #1
        add     x1, x1, x0, lsr #3      // Get word offset
+alt_lse "      prfm    pstl1strm, [x1]",       "nop"
        lsl     x4, x2, x3              // Create mask
 
 alt_lse        "1:     ldxr    x2, [x1]",              "\lse   x4, x2, [x1]"