From: Torok Edwin Date: Mon, 19 Oct 2009 11:00:58 +0000 (+0000) Subject: Fix PR5247, "lock addq" pattern (and other atomics), it DOES modify EFLAGS. X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=6602922878ca84f5adf86dc299b6fb32086a79f8 Fix PR5247, "lock addq" pattern (and other atomics), it DOES modify EFLAGS. LLC was scheduling compares before the adds causing wrong branches to be taken in programs, resulting in misoptimized code wherever atomic adds where used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@84485 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index ef19823a283..f7d5ef275fe 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1515,6 +1515,7 @@ def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val) } // Optimized codegen when the non-memory output is not used. +let Defs = [EFLAGS] in { // FIXME: Use normal add / sub instructions and add lock prefix dynamically. def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "lock\n\t" @@ -1544,7 +1545,7 @@ def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "lock\n\t" "dec{q}\t$dst", []>, LOCK; - +} // Atomic exchange, and, or, xor let Constraints = "$val = $dst", Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in { diff --git a/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll b/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll new file mode 100644 index 00000000000..d7f0c1afa3b --- /dev/null +++ b/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll @@ -0,0 +1,69 @@ +; RUN: llvm-as <%s | llc | FileCheck %s +; PR 5247 +; check that cmp is not scheduled before the add +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +@.str76843 = external constant [45 x i8] ; <[45 x i8]*> [#uses=1] +@__profiling_callsite_timestamps_live = external global [1216 x i64] ; <[1216 x i64]*> [#uses=2] + +define i32 @cl_init(i32 %initoptions) nounwind { +entry: + %retval.i = alloca i32 ; [#uses=3] + %retval = alloca i32 ; [#uses=2] + %initoptions.addr = alloca i32 ; [#uses=2] + tail call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind + %0 = tail call i64 @llvm.readcyclecounter() nounwind ; [#uses=1] + store i32 %initoptions, i32* %initoptions.addr + %1 = bitcast i32* %initoptions.addr to { }* ; <{ }*> [#uses=0] + call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind + %2 = call i64 @llvm.readcyclecounter() nounwind ; [#uses=1] + %call.i = call i32 @lt_dlinit() nounwind ; [#uses=1] + %tobool.i = icmp ne i32 %call.i, 0 ; [#uses=1] + br i1 %tobool.i, label %if.then.i, label %if.end.i + +if.then.i: ; preds = %entry + %call1.i = call i32 @warn_dlerror(i8* getelementptr inbounds ([45 x i8]* @.str76843, i32 0, i32 0)) nounwind ; [#uses=0] + store i32 -1, i32* %retval.i + br label %lt_init.exit + +if.end.i: ; preds = %entry + store i32 0, i32* %retval.i + br label %lt_init.exit + +lt_init.exit: ; preds = %if.end.i, %if.then.i + %3 = load i32* %retval.i ; [#uses=1] + call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind + %4 = call i64 @llvm.readcyclecounter() nounwind ; [#uses=1] + %5 = sub i64 %4, %2 ; [#uses=1] + %6 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* getelementptr inbounds ([1216 x i64]* @__profiling_callsite_timestamps_live, i32 0, i32 51), i64 %5) nounwind ; [#uses=0] +;CHECK: lock +;CHECK-NEXT: {{xadd|addq}} %rdx, __profiling_callsite_timestamps_live +;CHECK-NEXT: cmpl $0, +;CHECK-NEXT: jne + %cmp = icmp eq i32 %3, 0 ; [#uses=1] + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %lt_init.exit + call void @cli_rarload() + br label %if.end + +if.end: ; preds = %if.then, %lt_init.exit + store i32 0, i32* %retval + %7 = load i32* %retval ; [#uses=1] + tail call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind + %8 = tail call i64 @llvm.readcyclecounter() nounwind ; [#uses=1] + %9 = sub i64 %8, %0 ; [#uses=1] + %10 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* getelementptr inbounds ([1216 x i64]* @__profiling_callsite_timestamps_live, i32 0, i32 50), i64 %9) ; [#uses=0] + ret i32 %7 +} + +declare void @cli_rarload() nounwind + +declare i32 @lt_dlinit() + +declare i32 @warn_dlerror(i8*) nounwind + +declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind + +declare i64 @llvm.readcyclecounter() nounwind