From: Jonathan Roelofs Date: Thu, 21 Aug 2014 14:35:47 +0000 (+0000) Subject: Add a thread-model knob for lowering atomics on baremetal & single threaded systems X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=4c3be1aa0f155cddb37104cd9d035c05a262d01d Add a thread-model knob for lowering atomics on baremetal & single threaded systems http://reviews.llvm.org/D4984 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216182 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h index 449d93418a4..28069c3f40a 100644 --- a/include/llvm/CodeGen/CommandFlags.h +++ b/include/llvm/CodeGen/CommandFlags.h @@ -54,6 +54,16 @@ RelocModel("relocation-model", "Relocatable external references, non-relocatable code"), clEnumValEnd)); +cl::opt +TMModel("thread-model", + cl::desc("Choose threading model"), + cl::init(ThreadModel::POSIX), + cl::values(clEnumValN(ThreadModel::POSIX, "posix", + "POSIX thread model"), + clEnumValN(ThreadModel::Single, "single", + "Single thread model"), + clEnumValEnd)); + cl::opt CMModel("code-model", cl::desc("Choose code model"), @@ -245,6 +255,8 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() { Options.MCOptions = InitMCTargetOptionsFromFlags(); Options.JTType = JTableType; + Options.ThreadModel = TMModel; + return Options; } diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h index 922fae54bb8..338291465b6 100644 --- a/include/llvm/Target/TargetOptions.h +++ b/include/llvm/Target/TargetOptions.h @@ -50,6 +50,13 @@ namespace llvm { }; } + namespace ThreadModel { + enum Model { + POSIX, // POSIX Threads + Single // Single Threaded Environment + }; + } + class TargetOptions { public: TargetOptions() @@ -220,6 +227,10 @@ namespace llvm { /// create for functions that have the jumptable attribute. JumpTable::JumpTableType JTType; + /// ThreadModel - This flag specifies the type of threading model to assume + /// for things like atomics + ThreadModel::Model ThreadModel; + /// Machine level options. MCTargetOptions MCOptions; }; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index faf12fefffd..42c8f202e0f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -781,8 +781,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use - // the default expansion. - if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { + // the default expansion. If we are targeting a single threaded system, + // then set them all for expand so we can lower them later into their + // non-atomic form. + if (TM.Options.ThreadModel == ThreadModel::Single) + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); + else if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { // ATOMIC_FENCE needs custom lowering; the others should have been expanded // to ldrex/strex loops already. setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index d85194b75ec..fc164ada35f 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -158,7 +158,10 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { } void ARMPassConfig::addIRPasses() { - addPass(createAtomicExpandLoadLinkedPass(TM)); + if (TM->Options.ThreadModel == ThreadModel::Single) + addPass(createLowerAtomicPass()); + else + addPass(createAtomicExpandLoadLinkedPass(TM)); // Cmpxchg instructions are often used with a subsequent comparison to // determine whether it succeeded. We can exploit existing control-flow in diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll index 5d623860582..c0171a8da12 100644 --- a/test/CodeGen/ARM/atomic-op.ll +++ b/test/CodeGen/ARM/atomic-op.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s ; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-T1 ; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs -mcpu=cortex-m0 | FileCheck %s --check-prefix=CHECK-M0 +; RUN: llc < %s -mtriple=thumbv7--none-eabi -thread-model single -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-BAREMETAL define void @func(i32 %argc, i8** %argv) nounwind { entry: @@ -28,6 +29,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_add_4 ; CHECK-M0: bl ___sync_fetch_and_add_4 + ; CHECK-BAREMETAL: add + ; CHECK-BAREMETAL-NOT: __sync %0 = atomicrmw add i32* %val1, i32 %tmp monotonic store i32 %0, i32* %old ; CHECK: ldrex @@ -35,6 +38,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_sub_4 ; CHECK-M0: bl ___sync_fetch_and_sub_4 + ; CHECK-BAREMETAL: sub + ; CHECK-BAREMETAL-NOT: __sync %1 = atomicrmw sub i32* %val2, i32 30 monotonic store i32 %1, i32* %old ; CHECK: ldrex @@ -42,6 +47,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_add_4 ; CHECK-M0: bl ___sync_fetch_and_add_4 + ; CHECK-BAREMETAL: add + ; CHECK-BAREMETAL-NOT: __sync %2 = atomicrmw add i32* %val2, i32 1 monotonic store i32 %2, i32* %old ; CHECK: ldrex @@ -49,6 +56,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_sub_4 ; CHECK-M0: bl ___sync_fetch_and_sub_4 + ; CHECK-BAREMETAL: sub + ; CHECK-BAREMETAL-NOT: __sync %3 = atomicrmw sub i32* %val2, i32 1 monotonic store i32 %3, i32* %old ; CHECK: ldrex @@ -56,6 +65,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_and_4 ; CHECK-M0: bl ___sync_fetch_and_and_4 + ; CHECK-BAREMETAL: and + ; CHECK-BAREMETAL-NOT: __sync %4 = atomicrmw and i32* %andt, i32 4080 monotonic store i32 %4, i32* %old ; CHECK: ldrex @@ -63,6 +74,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_or_4 ; CHECK-M0: bl ___sync_fetch_and_or_4 + ; CHECK-BAREMETAL: or + ; CHECK-BAREMETAL-NOT: __sync %5 = atomicrmw or i32* %ort, i32 4080 monotonic store i32 %5, i32* %old ; CHECK: ldrex @@ -70,6 +83,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_xor_4 ; CHECK-M0: bl ___sync_fetch_and_xor_4 + ; CHECK-BAREMETAL: eor + ; CHECK-BAREMETAL-NOT: __sync %6 = atomicrmw xor i32* %xort, i32 4080 monotonic store i32 %6, i32* %old ; CHECK: ldrex @@ -77,6 +92,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_min_4 ; CHECK-M0: bl ___sync_fetch_and_min_4 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %7 = atomicrmw min i32* %val2, i32 16 monotonic store i32 %7, i32* %old %neg = sub i32 0, 1 @@ -85,6 +102,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_min_4 ; CHECK-M0: bl ___sync_fetch_and_min_4 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %8 = atomicrmw min i32* %val2, i32 %neg monotonic store i32 %8, i32* %old ; CHECK: ldrex @@ -92,6 +111,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_max_4 ; CHECK-M0: bl ___sync_fetch_and_max_4 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %9 = atomicrmw max i32* %val2, i32 1 monotonic store i32 %9, i32* %old ; CHECK: ldrex @@ -99,6 +120,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_max_4 ; CHECK-M0: bl ___sync_fetch_and_max_4 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %10 = atomicrmw max i32* %val2, i32 0 monotonic store i32 %10, i32* %old ; CHECK: ldrex @@ -106,6 +129,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umin_4 ; CHECK-M0: bl ___sync_fetch_and_umin_4 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %11 = atomicrmw umin i32* %val2, i32 16 monotonic store i32 %11, i32* %old %uneg = sub i32 0, 1 @@ -114,6 +139,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umin_4 ; CHECK-M0: bl ___sync_fetch_and_umin_4 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic store i32 %12, i32* %old ; CHECK: ldrex @@ -121,6 +148,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umax_4 ; CHECK-M0: bl ___sync_fetch_and_umax_4 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %13 = atomicrmw umax i32* %val2, i32 1 monotonic store i32 %13, i32* %old ; CHECK: ldrex @@ -128,6 +157,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umax_4 ; CHECK-M0: bl ___sync_fetch_and_umax_4 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %14 = atomicrmw umax i32* %val2, i32 0 monotonic store i32 %14, i32* %old @@ -144,6 +175,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umin_2 ; CHECK-M0: bl ___sync_fetch_and_umin_2 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %0 = atomicrmw umin i16* %val, i16 16 monotonic store i16 %0, i16* %old %uneg = sub i16 0, 1 @@ -152,6 +185,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umin_2 ; CHECK-M0: bl ___sync_fetch_and_umin_2 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %1 = atomicrmw umin i16* %val, i16 %uneg monotonic store i16 %1, i16* %old ; CHECK: ldrex @@ -159,6 +194,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umax_2 ; CHECK-M0: bl ___sync_fetch_and_umax_2 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %2 = atomicrmw umax i16* %val, i16 1 monotonic store i16 %2, i16* %old ; CHECK: ldrex @@ -166,6 +203,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umax_2 ; CHECK-M0: bl ___sync_fetch_and_umax_2 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %3 = atomicrmw umax i16* %val, i16 0 monotonic store i16 %3, i16* %old ret void @@ -181,6 +220,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umin_1 ; CHECK-M0: bl ___sync_fetch_and_umin_1 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %0 = atomicrmw umin i8* %val, i8 16 monotonic store i8 %0, i8* %old ; CHECK: ldrex @@ -188,6 +229,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umin_1 ; CHECK-M0: bl ___sync_fetch_and_umin_1 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %uneg = sub i8 0, 1 %1 = atomicrmw umin i8* %val, i8 %uneg monotonic store i8 %1, i8* %old @@ -196,6 +239,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umax_1 ; CHECK-M0: bl ___sync_fetch_and_umax_1 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %2 = atomicrmw umax i8* %val, i8 1 monotonic store i8 %2, i8* %old ; CHECK: ldrex @@ -203,6 +248,8 @@ entry: ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umax_1 ; CHECK-M0: bl ___sync_fetch_and_umax_1 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %3 = atomicrmw umax i8* %val, i8 0 monotonic store i8 %3, i8* %old ret void @@ -256,3 +303,69 @@ define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) { ret i32 %oldval } + +define i32 @load_load_add_acquire(i32* %mem1, i32* %mem2) nounwind { +; CHECK-LABEL: load_load_add_acquire + %val1 = load atomic i32* %mem1 acquire, align 4 + %val2 = load atomic i32* %mem2 acquire, align 4 + %tmp = add i32 %val1, %val2 + +; CHECK: ldr {{r[0-9]}}, [r0] +; CHECK: dmb +; CHECK: ldr {{r[0-9]}}, [r1] +; CHECK: dmb +; CHECK: add r0, + +; CHECK-M0: ___sync_val_compare_and_swap_4 +; CHECK-M0: ___sync_val_compare_and_swap_4 + +; CHECK-BAREMETAL: ldr {{r[0-9]}}, [r0] +; CHECK-BAREMETAL-NOT: dmb +; CHECK-BAREMETAL: ldr {{r[0-9]}}, [r1] +; CHECK-BAREMETAL-NOT: dmb +; CHECK-BAREMETAL: add r0, + + ret i32 %tmp +} + +define void @store_store_release(i32* %mem1, i32 %val1, i32* %mem2, i32 %val2) { +; CHECK-LABEL: store_store_release + store atomic i32 %val1, i32* %mem1 release, align 4 + store atomic i32 %val2, i32* %mem2 release, align 4 + +; CHECK: dmb +; CHECK: str r1, [r0] +; CHECK: dmb +; CHECK: str r3, [r2] + +; CHECK-M0: ___sync_lock_test_and_set +; CHECK-M0: ___sync_lock_test_and_set + +; CHECK-BAREMETAL-NOT: dmb +; CHECK-BAREMTEAL: str r1, [r0] +; CHECK-BAREMETAL-NOT: dmb +; CHECK-BAREMTEAL: str r3, [r2] + + ret void +} + +define void @load_fence_store_monotonic(i32* %mem1, i32* %mem2) { +; CHECK-LABEL: load_fence_store_monotonic + %val = load atomic i32* %mem1 monotonic, align 4 + fence seq_cst + store atomic i32 %val, i32* %mem2 monotonic, align 4 + +; CHECK: ldr [[R0:r[0-9]]], [r0] +; CHECK: dmb +; CHECK: str [[R0]], [r1] + +; CHECK-M0: ldr [[R0:r[0-9]]], [r0] +; CHECK-M0: dmb +; CHECK-M0: str [[R0]], [r1] + +; CHECK-BAREMETAL: ldr [[R0:r[0-9]]], [r0] +; CHECK-BAREMETAL-NOT: dmb +; CHECK-BAREMETAL: str [[R0]], [r1] + + ret void +}