From 9a9d275dc7897dfba7f41ce1b3770ca27ac149e8 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 22 Jul 2010 02:48:34 +0000 Subject: [PATCH] Custom lower the memory barrier instructions and add support for lowering without sse2. Add a couple of new testcases. Fixes a few libgomp tests and latent bugs. Remove a few todos. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@109078 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/README.txt | 7 ------ lib/Target/X86/X86ISelLowering.cpp | 36 ++++++++++++++++++++++++++++-- lib/Target/X86/X86ISelLowering.h | 9 +++++++- lib/Target/X86/X86InstrInfo.td | 29 ++++++++++++++++++++++++ lib/Target/X86/X86InstrSSE.td | 12 ++++------ test/CodeGen/X86/barrier-sse.ll | 21 +++++++++++++++++ test/CodeGen/X86/barrier.ll | 7 ++++++ 7 files changed, 103 insertions(+), 18 deletions(-) create mode 100644 test/CodeGen/X86/barrier-sse.ll create mode 100644 test/CodeGen/X86/barrier.ll diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index efc0cd82f23..6034a091d14 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1135,13 +1135,6 @@ void test(double *P) { //===---------------------------------------------------------------------===// -handling llvm.memory.barrier on pre SSE2 cpus - -should generate: -lock ; mov %esp, %esp - -//===---------------------------------------------------------------------===// - The generated code on x86 for checking for signed overflow on a multiply the obvious way is much longer than it needs to be. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a67331fe777..634c08e54de 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -343,8 +343,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->hasSSE1()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); - if (!Subtarget->hasSSE2()) - setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); + // We may not have a libcall for MEMBARRIER so we should lower this. + setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom); + // On X86 and X86-64, atomic operations are lowered to locked instructions. // Locked instructions, in turn, have implicit fence semantics (all memory // operations are flushed before issuing the locked instruction, and they @@ -7509,6 +7510,36 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { return Sum; } +SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ + DebugLoc dl = Op.getDebugLoc(); + + if (!Subtarget->hasSSE2()) + return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(0, MVT::i32)); + + unsigned isDev = cast(Op.getOperand(5))->getZExtValue(); + if(!isDev) + return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); + else { + unsigned Op1 = cast(Op.getOperand(1))->getZExtValue(); + unsigned Op2 = cast(Op.getOperand(2))->getZExtValue(); + unsigned Op3 = cast(Op.getOperand(3))->getZExtValue(); + unsigned Op4 = cast(Op.getOperand(4))->getZExtValue(); + + // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; + if (!Op1 && !Op2 && !Op3 && Op4) + return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0)); + + // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; + if (Op1 && !Op2 && !Op3 && !Op4) + return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0)); + + // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)), + // (MFENCE)>; + return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); + } +} + SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const { EVT T = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); @@ -7598,6 +7629,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); + case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG); case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 4e4daa4bc5c..1809de5344e 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -265,7 +265,13 @@ namespace llvm { ATOMXOR64_DAG, ATOMAND64_DAG, ATOMNAND64_DAG, - ATOMSWAP64_DAG + ATOMSWAP64_DAG, + + // Memory barrier + MEMBARRIER, + MFENCE, + SFENCE, + LFENCE // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be @@ -715,6 +721,7 @@ namespace llvm { SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const; SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 6917a9db648..597356d7b48 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -80,6 +80,21 @@ def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; +def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>; +def SDT_X86MEMBARRIERNoSSE : SDTypeProfile<0, 1, [SDTCisInt<0>]>; + +def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER, + [SDNPHasChain]>; +def X86MemBarrierNoSSE : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIERNoSSE, + [SDNPHasChain]>; +def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER, + [SDNPHasChain]>; +def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER, + [SDNPHasChain]>; +def X86LFence : SDNode<"X86ISD::LFENCE", SDT_X86MEMBARRIER, + [SDNPHasChain]>; + + def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>; def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>; def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>; @@ -3906,6 +3921,20 @@ def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), // Atomic support // +// Memory barriers +let hasSideEffects = 1 in { +def Int_MemBarrier : I<0, Pseudo, (outs), (ins), + "#MEMBARRIER", + [(X86MemBarrier)]>, Requires<[HasSSE2]>; + +// TODO: Get this to fold the constant into the instruction. +let Uses = [ESP] in +def Int_MemBarrierNoSSE : I<0x0B, Pseudo, (outs), (ins GR32:$zero), + "lock\n\t" + "or{l}\t{$zero, (%esp)|(%esp), $zero}", + [(X86MemBarrierNoSSE GR32:$zero)]>, LOCK; +} + // Atomic swap. These are just normal xchg instructions. But since a memory // operand is referenced, the atomicity is ensured. let Constraints = "$val = $dst" in { diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 804d0592322..d8bb435c29c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2001,6 +2001,7 @@ def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), // Load, store, and memory fence def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>; +def : Pat<(X86SFence), (SFENCE)>; // Alias instructions that map zero vector to pxor / xorp* for sse. // We set canFoldAsLoad because this can be converted to a constant-pool @@ -3024,19 +3025,14 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; +def : Pat<(X86LFence), (LFENCE)>; +def : Pat<(X86MFence), (MFENCE)>; + // Pause. This "instruction" is encoded as "rep; nop", so even though it // was introduced with SSE2, it's backward compatible. def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; -//TODO: custom lower this so as to never even generate the noop -def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), - (i8 0)), (NOOP)>; -def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; -def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; -def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), - (i8 1)), (MFENCE)>; - // Alias instructions that map zero vector to pxor / xorp* for sse. // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. diff --git a/test/CodeGen/X86/barrier-sse.ll b/test/CodeGen/X86/barrier-sse.ll new file mode 100644 index 00000000000..6190c3684ed --- /dev/null +++ b/test/CodeGen/X86/barrier-sse.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep sfence +; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lfence +; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep mfence +; RUN: llc < %s -march=x86 -mattr=+sse2 | grep MEMBARRIER + + +declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1) + +define void @test() { + call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 false) + call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 false, i1 false) + call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 true, i1 false) + + call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 false, i1 false) + call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 true, i1 false) + call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 true, i1 false) + + call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 false) + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 false) + ret void +} diff --git a/test/CodeGen/X86/barrier.ll b/test/CodeGen/X86/barrier.ll new file mode 100644 index 00000000000..fad6ef690c2 --- /dev/null +++ b/test/CodeGen/X86/barrier.ll @@ -0,0 +1,7 @@ +; RUN: llc < %s -march=x86 -mattr=-sse2 | grep lock +declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1) + +define void @test() { + call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 false) + ret void +} \ No newline at end of file -- 2.34.1