From 4e694c96f1c0c2d09a287ff69bab5896e04dd3fd Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Tue, 31 May 2011 02:54:07 +0000 Subject: [PATCH] This patch implements atomic intrinsics atomic.load.add (sub,and,or,xor, nand), atomic.swap and atomic.cmp.swap, all in i8, i16 and i32 versions. The intrinsics are implemented by creating pseudo-instructions, which are then expanded in the method MipsTargetLowering::EmitInstrWithCustomInserter. Patch by Sasa Stankovic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@132323 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 532 +++++++++++++++++++++++++- lib/Target/Mips/MipsISelLowering.h | 10 + lib/Target/Mips/MipsInstrInfo.td | 117 ++++++ lib/Target/Mips/MipsMachineFunction.h | 11 +- test/CodeGen/Mips/atomic.ll | 253 ++++++++++++ 5 files changed, 917 insertions(+), 6 deletions(-) create mode 100644 test/CodeGen/Mips/atomic.ll diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 1ee51722da0..a94e6d0ff81 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -557,11 +557,6 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) { MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { - // There is no need to expand CMov instructions if target has - // conditional moves. - if (Subtarget->hasCondMov()) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); bool isFPCmp = false; DebugLoc dl = MI->getDebugLoc(); @@ -569,6 +564,63 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, switch (MI->getOpcode()) { default: assert(false && "Unexpected instr type to insert"); + + case Mips::ATOMIC_LOAD_ADD_I8: + return EmitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu); + case Mips::ATOMIC_LOAD_ADD_I16: + return EmitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu); + case Mips::ATOMIC_LOAD_ADD_I32: + return EmitAtomicBinary(MI, BB, 4, Mips::ADDu); + + case Mips::ATOMIC_LOAD_AND_I8: + return EmitAtomicBinaryPartword(MI, BB, 1, Mips::AND); + case Mips::ATOMIC_LOAD_AND_I16: + return EmitAtomicBinaryPartword(MI, BB, 2, Mips::AND); + case Mips::ATOMIC_LOAD_AND_I32: + return EmitAtomicBinary(MI, BB, 4, Mips::AND); + + case Mips::ATOMIC_LOAD_OR_I8: + return EmitAtomicBinaryPartword(MI, BB, 1, Mips::OR); + case Mips::ATOMIC_LOAD_OR_I16: + return EmitAtomicBinaryPartword(MI, BB, 2, Mips::OR); + case Mips::ATOMIC_LOAD_OR_I32: + return EmitAtomicBinary(MI, BB, 4, Mips::OR); + + case Mips::ATOMIC_LOAD_XOR_I8: + return EmitAtomicBinaryPartword(MI, BB, 1, Mips::XOR); + case Mips::ATOMIC_LOAD_XOR_I16: + return EmitAtomicBinaryPartword(MI, BB, 2, Mips::XOR); + case Mips::ATOMIC_LOAD_XOR_I32: + return EmitAtomicBinary(MI, BB, 4, Mips::XOR); + + case Mips::ATOMIC_LOAD_NAND_I8: + return EmitAtomicBinaryPartword(MI, BB, 1, 0, true); + case Mips::ATOMIC_LOAD_NAND_I16: + return EmitAtomicBinaryPartword(MI, BB, 2, 0, true); + case Mips::ATOMIC_LOAD_NAND_I32: + return EmitAtomicBinary(MI, BB, 4, 0, true); + + case Mips::ATOMIC_LOAD_SUB_I8: + return EmitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu); + case Mips::ATOMIC_LOAD_SUB_I16: + return EmitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu); + case Mips::ATOMIC_LOAD_SUB_I32: + return EmitAtomicBinary(MI, BB, 4, Mips::SUBu); + + case Mips::ATOMIC_SWAP_I8: + return EmitAtomicBinaryPartword(MI, BB, 1, 0); + case Mips::ATOMIC_SWAP_I16: + return EmitAtomicBinaryPartword(MI, BB, 2, 0); + case Mips::ATOMIC_SWAP_I32: + return EmitAtomicBinary(MI, BB, 4, 0); + + case Mips::ATOMIC_CMP_SWAP_I8: + return EmitAtomicCmpSwapPartword(MI, BB, 1); + case Mips::ATOMIC_CMP_SWAP_I16: + return EmitAtomicCmpSwapPartword(MI, BB, 2); + case Mips::ATOMIC_CMP_SWAP_I32: + return EmitAtomicCmpSwap(MI, BB, 4); + case Mips::MOVT: case Mips::MOVT_S: case Mips::MOVT_D: @@ -593,6 +645,11 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, break; } + // There is no need to expand CMov instructions if target has + // conditional moves. + if (Subtarget->hasCondMov()) + return BB; + // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the // destination vreg to set, the condition code register to branch on, the @@ -660,6 +717,471 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return BB; } +// This function also handles Mips::ATOMIC_SWAP_I32 (when BinOpcode == 0), and +// Mips::ATOMIC_LOAD_NAND_I32 (when Nand == true) +MachineBasicBlock * +MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Size, unsigned BinOpcode, bool Nand) const { + assert(Size == 4 && "Unsupported size for EmitAtomicBinary."); + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::i32); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Ptr = MI->getOperand(1).getReg(); + unsigned Incr = MI->getOperand(2).getReg(); + + unsigned Oldval = RegInfo.createVirtualRegister(RC); + unsigned Tmp1 = RegInfo.createVirtualRegister(RC); + unsigned Tmp2 = RegInfo.createVirtualRegister(RC); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = BB; + ++It; + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + // thisMBB: + // ... + // sw incr, fi(sp) // store incr to stack (when BinOpcode == 0) + // fallthrough --> loopMBB + + // Note: for atomic.swap (when BinOpcode == 0), storing incr to stack before + // the loop and then loading it from stack in block loopMBB is necessary to + // prevent MachineLICM pass to hoist "or" instruction out of the block + // loopMBB. + + int fi; + if (BinOpcode == 0 && !Nand) { + // Get or create a temporary stack location. + MipsFunctionInfo *MipsFI = MF->getInfo(); + fi = MipsFI->getAtomicFrameIndex(); + if (fi == -1) { + fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false); + MipsFI->setAtomicFrameIndex(fi); + } + + BuildMI(BB, dl, TII->get(Mips::SW)) + .addReg(Incr).addImm(0).addFrameIndex(fi); + } + BB->addSuccessor(loopMBB); + + // loopMBB: + // ll oldval, 0(ptr) + // or dest, $0, oldval + // tmp1, oldval, incr + // sc tmp1, 0(ptr) + // beq tmp1, $0, loopMBB + BB = loopMBB; + BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addImm(0).addReg(Ptr); + BuildMI(BB, dl, TII->get(Mips::OR), Dest).addReg(Mips::ZERO).addReg(Oldval); + if (Nand) { + // and tmp2, oldval, incr + // nor tmp1, $0, tmp2 + BuildMI(BB, dl, TII->get(Mips::AND), Tmp2).addReg(Oldval).addReg(Incr); + BuildMI(BB, dl, TII->get(Mips::NOR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2); + } else if (BinOpcode) { + // tmp1, oldval, incr + BuildMI(BB, dl, TII->get(BinOpcode), Tmp1).addReg(Oldval).addReg(Incr); + } else { + // lw tmp2, fi(sp) // load incr from stack + // or tmp1, $zero, tmp2 + BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addImm(0).addFrameIndex(fi);; + BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2); + } + BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addImm(0).addReg(Ptr); + BuildMI(BB, dl, TII->get(Mips::BEQ)) + .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loopMBB); + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock * +MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, + MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, + bool Nand) const { + assert((Size == 1 || Size == 2) && + "Unsupported size for EmitAtomicBinaryPartial."); + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::i32); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Ptr = MI->getOperand(1).getReg(); + unsigned Incr = MI->getOperand(2).getReg(); + + unsigned Addr = RegInfo.createVirtualRegister(RC); + unsigned Shift = RegInfo.createVirtualRegister(RC); + unsigned Mask = RegInfo.createVirtualRegister(RC); + unsigned Mask2 = RegInfo.createVirtualRegister(RC); + unsigned Newval = RegInfo.createVirtualRegister(RC); + unsigned Oldval = RegInfo.createVirtualRegister(RC); + unsigned Incr2 = RegInfo.createVirtualRegister(RC); + unsigned Tmp1 = RegInfo.createVirtualRegister(RC); + unsigned Tmp2 = RegInfo.createVirtualRegister(RC); + unsigned Tmp3 = RegInfo.createVirtualRegister(RC); + unsigned Tmp4 = RegInfo.createVirtualRegister(RC); + unsigned Tmp5 = RegInfo.createVirtualRegister(RC); + unsigned Tmp6 = RegInfo.createVirtualRegister(RC); + unsigned Tmp7 = RegInfo.createVirtualRegister(RC); + unsigned Tmp8 = RegInfo.createVirtualRegister(RC); + unsigned Tmp9 = RegInfo.createVirtualRegister(RC); + unsigned Tmp10 = RegInfo.createVirtualRegister(RC); + unsigned Tmp11 = RegInfo.createVirtualRegister(RC); + unsigned Tmp12 = RegInfo.createVirtualRegister(RC); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = BB; + ++It; + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + // thisMBB: + // addiu tmp1,$0,-4 # 0xfffffffc + // and addr,ptr,tmp1 + // andi tmp2,ptr,3 + // sll shift,tmp2,3 + // ori tmp3,$0,255 # 0xff + // sll mask,tmp3,shift + // nor mask2,$0,mask + // andi tmp4,incr,255 + // sll incr2,tmp4,shift + // sw incr2, fi(sp) // store incr2 to stack (when BinOpcode == 0) + + // Note: for atomic.swap (when BinOpcode == 0), storing incr2 to stack before + // the loop and then loading it from stack in block loopMBB is necessary to + // prevent MachineLICM pass to hoist "or" instruction out of the block + // loopMBB. + + int64_t MaskImm = (Size == 1) ? 255 : 65535; + BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4); + BuildMI(BB, dl, TII->get(Mips::AND), Addr).addReg(Ptr).addReg(Tmp1); + BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp2).addReg(Ptr).addImm(3); + BuildMI(BB, dl, TII->get(Mips::SLL), Shift).addReg(Tmp2).addImm(3); + BuildMI(BB, dl, TII->get(Mips::ORi), Tmp3).addReg(Mips::ZERO).addImm(MaskImm); + BuildMI(BB, dl, TII->get(Mips::SLL), Mask).addReg(Tmp3).addReg(Shift); + BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); + if (BinOpcode != Mips::SUBu) { + BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp4).addReg(Incr).addImm(MaskImm); + BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp4).addReg(Shift); + } else { + BuildMI(BB, dl, TII->get(Mips::SUBu), Tmp4).addReg(Mips::ZERO).addReg(Incr); + BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp5).addReg(Tmp4).addImm(MaskImm); + BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp5).addReg(Shift); + } + int fi; + if (BinOpcode == 0 && !Nand) { + // Get or create a temporary stack location. + MipsFunctionInfo *MipsFI = MF->getInfo(); + fi = MipsFI->getAtomicFrameIndex(); + if (fi == -1) { + fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false); + MipsFI->setAtomicFrameIndex(fi); + } + + BuildMI(BB, dl, TII->get(Mips::SW)) + .addReg(Incr2).addImm(0).addFrameIndex(fi); + } + BB->addSuccessor(loopMBB); + + // loopMBB: + // ll oldval,0(addr) + // binop tmp7,oldval,incr2 + // and newval,tmp7,mask + // and tmp8,oldval,mask2 + // or tmp9,tmp8,newval + // sc tmp9,0(addr) + // beq tmp9,$0,loopMBB + BB = loopMBB; + BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addImm(0).addReg(Addr); + if (Nand) { + // and tmp6, oldval, incr2 + // nor tmp7, $0, tmp6 + BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval).addReg(Incr2); + BuildMI(BB, dl, TII->get(Mips::NOR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6); + } else if (BinOpcode == Mips::SUBu) { + // addu tmp7, oldval, incr2 + BuildMI(BB, dl, TII->get(Mips::ADDu), Tmp7).addReg(Oldval).addReg(Incr2); + } else if (BinOpcode) { + // tmp7, oldval, incr2 + BuildMI(BB, dl, TII->get(BinOpcode), Tmp7).addReg(Oldval).addReg(Incr2); + } else { + // lw tmp6, fi(sp) // load incr2 from stack + // or tmp7, $zero, tmp6 + BuildMI(BB, dl, TII->get(Mips::LW), Tmp6).addImm(0).addFrameIndex(fi);; + BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6); + } + BuildMI(BB, dl, TII->get(Mips::AND), Newval).addReg(Tmp7).addReg(Mask); + BuildMI(BB, dl, TII->get(Mips::AND), Tmp8).addReg(Oldval).addReg(Mask2); + BuildMI(BB, dl, TII->get(Mips::OR), Tmp9).addReg(Tmp8).addReg(Newval); + BuildMI(BB, dl, TII->get(Mips::SC), Tmp9).addReg(Tmp9).addImm(0).addReg(Addr); + BuildMI(BB, dl, TII->get(Mips::BEQ)) + .addReg(Tmp9).addReg(Mips::ZERO).addMBB(loopMBB); + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // and tmp10,oldval,mask + // srl tmp11,tmp10,shift + // sll tmp12,tmp11,24 + // sra dest,tmp12,24 + BB = exitMBB; + int64_t ShiftImm = (Size == 1) ? 24 : 16; + // reverse order + BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRA), Dest) + .addReg(Tmp12).addImm(ShiftImm); + BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SLL), Tmp12) + .addReg(Tmp11).addImm(ShiftImm); + BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRL), Tmp11) + .addReg(Tmp10).addReg(Shift); + BuildMI(*BB, BB->begin(), dl, TII->get(Mips::AND), Tmp10) + .addReg(Oldval).addReg(Mask); + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock * +MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size) const { + assert(Size == 4 && "Unsupported size for EmitAtomicCmpSwap."); + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::i32); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Ptr = MI->getOperand(1).getReg(); + unsigned Oldval = MI->getOperand(2).getReg(); + unsigned Newval = MI->getOperand(3).getReg(); + + unsigned Tmp1 = RegInfo.createVirtualRegister(RC); + unsigned Tmp2 = RegInfo.createVirtualRegister(RC); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = BB; + ++It; + MF->insert(It, loop1MBB); + MF->insert(It, loop2MBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Get or create a temporary stack location. + MipsFunctionInfo *MipsFI = MF->getInfo(); + int fi = MipsFI->getAtomicFrameIndex(); + if (fi == -1) { + fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false); + MipsFI->setAtomicFrameIndex(fi); + } + + // thisMBB: + // ... + // sw newval, fi(sp) // store newval to stack + // fallthrough --> loop1MBB + + // Note: storing newval to stack before the loop and then loading it from + // stack in block loop2MBB is necessary to prevent MachineLICM pass to + // hoist "or" instruction out of the block loop2MBB. + + BuildMI(BB, dl, TII->get(Mips::SW)) + .addReg(Newval).addImm(0).addFrameIndex(fi); + BB->addSuccessor(loop1MBB); + + // loop1MBB: + // ll dest, 0(ptr) + // bne dest, oldval, exitMBB + BB = loop1MBB; + BuildMI(BB, dl, TII->get(Mips::LL), Dest).addImm(0).addReg(Ptr); + BuildMI(BB, dl, TII->get(Mips::BNE)) + .addReg(Dest).addReg(Oldval).addMBB(exitMBB); + BB->addSuccessor(exitMBB); + BB->addSuccessor(loop2MBB); + + // loop2MBB: + // lw tmp2, fi(sp) // load newval from stack + // or tmp1, $0, tmp2 + // sc tmp1, 0(ptr) + // beq tmp1, $0, loop1MBB + BB = loop2MBB; + BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addImm(0).addFrameIndex(fi);; + BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2); + BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addImm(0).addReg(Ptr); + BuildMI(BB, dl, TII->get(Mips::BEQ)) + .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loop1MBB); + BB->addSuccessor(loop1MBB); + BB->addSuccessor(exitMBB); + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock * +MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size) const { + assert((Size == 1 || Size == 2) && + "Unsupported size for EmitAtomicCmpSwapPartial."); + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::i32); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Ptr = MI->getOperand(1).getReg(); + unsigned Oldval = MI->getOperand(2).getReg(); + unsigned Newval = MI->getOperand(3).getReg(); + + unsigned Addr = RegInfo.createVirtualRegister(RC); + unsigned Shift = RegInfo.createVirtualRegister(RC); + unsigned Mask = RegInfo.createVirtualRegister(RC); + unsigned Mask2 = RegInfo.createVirtualRegister(RC); + unsigned Oldval2 = RegInfo.createVirtualRegister(RC); + unsigned Oldval3 = RegInfo.createVirtualRegister(RC); + unsigned Oldval4 = RegInfo.createVirtualRegister(RC); + unsigned Newval2 = RegInfo.createVirtualRegister(RC); + unsigned Tmp1 = RegInfo.createVirtualRegister(RC); + unsigned Tmp2 = RegInfo.createVirtualRegister(RC); + unsigned Tmp3 = RegInfo.createVirtualRegister(RC); + unsigned Tmp4 = RegInfo.createVirtualRegister(RC); + unsigned Tmp5 = RegInfo.createVirtualRegister(RC); + unsigned Tmp6 = RegInfo.createVirtualRegister(RC); + unsigned Tmp7 = RegInfo.createVirtualRegister(RC); + unsigned Tmp8 = RegInfo.createVirtualRegister(RC); + unsigned Tmp9 = RegInfo.createVirtualRegister(RC); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = BB; + ++It; + MF->insert(It, loop1MBB); + MF->insert(It, loop2MBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + // thisMBB: + // addiu tmp1,$0,-4 # 0xfffffffc + // and addr,ptr,tmp1 + // andi tmp2,ptr,3 + // sll shift,tmp2,3 + // ori tmp3,$0,255 # 0xff + // sll mask,tmp3,shift + // nor mask2,$0,mask + // andi tmp4,oldval,255 + // sll oldval2,tmp4,shift + // andi tmp5,newval,255 + // sll newval2,tmp5,shift + int64_t MaskImm = (Size == 1) ? 255 : 65535; + BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4); + BuildMI(BB, dl, TII->get(Mips::AND), Addr).addReg(Ptr).addReg(Tmp1); + BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp2).addReg(Ptr).addImm(3); + BuildMI(BB, dl, TII->get(Mips::SLL), Shift).addReg(Tmp2).addImm(3); + BuildMI(BB, dl, TII->get(Mips::ORi), Tmp3).addReg(Mips::ZERO).addImm(MaskImm); + BuildMI(BB, dl, TII->get(Mips::SLL), Mask).addReg(Tmp3).addReg(Shift); + BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); + BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp4).addReg(Oldval).addImm(MaskImm); + BuildMI(BB, dl, TII->get(Mips::SLL), Oldval2).addReg(Tmp4).addReg(Shift); + BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp5).addReg(Newval).addImm(MaskImm); + BuildMI(BB, dl, TII->get(Mips::SLL), Newval2).addReg(Tmp5).addReg(Shift); + BB->addSuccessor(loop1MBB); + + // loop1MBB: + // ll oldval3,0(addr) + // and oldval4,oldval3,mask + // bne oldval4,oldval2,exitMBB + BB = loop1MBB; + BuildMI(BB, dl, TII->get(Mips::LL), Oldval3).addImm(0).addReg(Addr); + BuildMI(BB, dl, TII->get(Mips::AND), Oldval4).addReg(Oldval3).addReg(Mask); + BuildMI(BB, dl, TII->get(Mips::BNE)) + .addReg(Oldval4).addReg(Oldval2).addMBB(exitMBB); + BB->addSuccessor(exitMBB); + BB->addSuccessor(loop2MBB); + + // loop2MBB: + // and tmp6,oldval3,mask2 + // or tmp7,tmp6,newval2 + // sc tmp7,0(addr) + // beq tmp7,$0,loop1MBB + BB = loop2MBB; + BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval3).addReg(Mask2); + BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Tmp6).addReg(Newval2); + BuildMI(BB, dl, TII->get(Mips::SC), Tmp7) + .addReg(Tmp7).addImm(0).addReg(Addr); + BuildMI(BB, dl, TII->get(Mips::BEQ)) + .addReg(Tmp7).addReg(Mips::ZERO).addMBB(loop1MBB); + BB->addSuccessor(loop1MBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // srl tmp8,oldval4,shift + // sll tmp9,tmp8,24 + // sra dest,tmp9,24 + BB = exitMBB; + int64_t ShiftImm = (Size == 1) ? 24 : 16; + // reverse order + BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRA), Dest) + .addReg(Tmp9).addImm(ShiftImm); + BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SLL), Tmp9) + .addReg(Tmp8).addImm(ShiftImm); + BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRL), Tmp8) + .addReg(Oldval4).addReg(Shift); + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + //===----------------------------------------------------------------------===// // Misc Lower Operation implementation //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 24dc157832c..7c6b6e7596d 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -176,6 +176,16 @@ namespace llvm { /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; + + MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Size, unsigned BinOpcode, bool Nand = false) const; + MachineBasicBlock *EmitAtomicBinaryPartword(MachineInstr *MI, + MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, + bool Nand = false) const; + MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, + MachineBasicBlock *BB, unsigned Size) const; + MachineBasicBlock *EmitAtomicCmpSwapPartword(MachineInstr *MI, + MachineBasicBlock *BB, unsigned Size) const; }; } diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index b9b81f7fc37..b32ae6faf86 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -405,6 +405,115 @@ def ATMACRO : MipsPseudo<(outs), (ins), ".set\tat", []>; def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>; def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc\n", []>; +let usesCustomInserter = 1 in { + def ATOMIC_LOAD_ADD_I8 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + "atomic_load_add_8\t$dst, $ptr, $incr", + [(set CPURegs:$dst, (atomic_load_add_8 CPURegs:$ptr, CPURegs:$incr))]>; + def ATOMIC_LOAD_ADD_I16 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + "atomic_load_add_16\t$dst, $ptr, $incr", + [(set CPURegs:$dst, (atomic_load_add_16 CPURegs:$ptr, CPURegs:$incr))]>; + def ATOMIC_LOAD_ADD_I32 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + "atomic_load_add_32\t$dst, $ptr, $incr", + [(set CPURegs:$dst, (atomic_load_add_32 CPURegs:$ptr, CPURegs:$incr))]>; + + def ATOMIC_LOAD_SUB_I8 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + "atomic_load_sub_8\t$dst, $ptr, $incr", + [(set CPURegs:$dst, (atomic_load_sub_8 CPURegs:$ptr, CPURegs:$incr))]>; + def ATOMIC_LOAD_SUB_I16 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + "atomic_load_sub_16\t$dst, $ptr, $incr", + [(set CPURegs:$dst, (atomic_load_sub_16 CPURegs:$ptr, CPURegs:$incr))]>; + def ATOMIC_LOAD_SUB_I32 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + "atomic_load_sub_32\t$dst, $ptr, $incr", + [(set CPURegs:$dst, (atomic_load_sub_32 CPURegs:$ptr, CPURegs:$incr))]>; + + def ATOMIC_LOAD_AND_I8 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + "atomic_load_and_8\t$dst, $ptr, $incr", + [(set CPURegs:$dst, (atomic_load_and_8 CPURegs:$ptr, CPURegs:$incr))]>; + def ATOMIC_LOAD_AND_I16 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + "atomic_load_and_16\t$dst, $ptr, $incr", + [(set CPURegs:$dst, (atomic_load_and_16 CPURegs:$ptr, CPURegs:$incr))]>; + def ATOMIC_LOAD_AND_I32 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + "atomic_load_and_32\t$dst, $ptr, $incr", + [(set CPURegs:$dst, (atomic_load_and_32 CPURegs:$ptr, CPURegs:$incr))]>; + + def ATOMIC_LOAD_OR_I8 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + "atomic_load_or_8\t$dst, $ptr, $incr", + [(set CPURegs:$dst, (atomic_load_or_8 CPURegs:$ptr, CPURegs:$incr))]>; + def ATOMIC_LOAD_OR_I16 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + "atomic_load_or_16\t$dst, $ptr, $incr", + [(set CPURegs:$dst, (atomic_load_or_16 CPURegs:$ptr, CPURegs:$incr))]>; + def ATOMIC_LOAD_OR_I32 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + "atomic_load_or_32\t$dst, $ptr, $incr", + [(set CPURegs:$dst, (atomic_load_or_32 CPURegs:$ptr, CPURegs:$incr))]>; + + def ATOMIC_LOAD_XOR_I8 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + "atomic_load_xor_8\t$dst, $ptr, $incr", + [(set CPURegs:$dst, (atomic_load_xor_8 CPURegs:$ptr, CPURegs:$incr))]>; + def ATOMIC_LOAD_XOR_I16 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + "atomic_load_xor_16\t$dst, $ptr, $incr", + [(set CPURegs:$dst, (atomic_load_xor_16 CPURegs:$ptr, CPURegs:$incr))]>; + def ATOMIC_LOAD_XOR_I32 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + "atomic_load_xor_32\t$dst, $ptr, $incr", + [(set CPURegs:$dst, (atomic_load_xor_32 CPURegs:$ptr, CPURegs:$incr))]>; + + def ATOMIC_LOAD_NAND_I8 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + "atomic_load_nand_8\t$dst, $ptr, $incr", + [(set CPURegs:$dst, (atomic_load_nand_8 CPURegs:$ptr, CPURegs:$incr))]>; + def ATOMIC_LOAD_NAND_I16 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + "atomic_load_nand_16\t$dst, $ptr, $incr", + [(set CPURegs:$dst, (atomic_load_nand_16 CPURegs:$ptr, CPURegs:$incr))]>; + def ATOMIC_LOAD_NAND_I32 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + "atomic_load_nand_32\t$dst, $ptr, $incr", + [(set CPURegs:$dst, (atomic_load_nand_32 CPURegs:$ptr, CPURegs:$incr))]>; + + def ATOMIC_SWAP_I8 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val), + "atomic_swap_8\t$dst, $ptr, $val", + [(set CPURegs:$dst, (atomic_swap_8 CPURegs:$ptr, CPURegs:$val))]>; + def ATOMIC_SWAP_I16 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val), + "atomic_swap_16\t$dst, $ptr, $val", + [(set CPURegs:$dst, (atomic_swap_16 CPURegs:$ptr, CPURegs:$val))]>; + def ATOMIC_SWAP_I32 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val), + "atomic_swap_32\t$dst, $ptr, $val", + [(set CPURegs:$dst, (atomic_swap_32 CPURegs:$ptr, CPURegs:$val))]>; + + def ATOMIC_CMP_SWAP_I8 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval), + "atomic_cmp_swap_8\t$dst, $ptr, $oldval, $newval", + [(set CPURegs:$dst, + (atomic_cmp_swap_8 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>; + def ATOMIC_CMP_SWAP_I16 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval), + "atomic_cmp_swap_16\t$dst, $ptr, $oldval, $newval", + [(set CPURegs:$dst, + (atomic_cmp_swap_16 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>; + def ATOMIC_CMP_SWAP_I32 : MipsPseudo< + (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval), + "atomic_cmp_swap_32\t$dst, $ptr, $oldval, $newval", + [(set CPURegs:$dst, + (atomic_cmp_swap_32 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>; +} + //===----------------------------------------------------------------------===// // Instruction definition //===----------------------------------------------------------------------===// @@ -459,6 +568,14 @@ def SB : StoreM<0x28, "sb", truncstorei8>; def SH : StoreM<0x29, "sh", truncstorei16>; def SW : StoreM<0x2b, "sw", store>; +/// Load-linked, Store-conditional +let hasDelaySlot = 1 in + def LL : FI<0x30, (outs CPURegs:$dst), (ins mem:$addr), + "ll\t$dst, $addr", [], IILoad>; +let Constraints = "$src = $dst" in + def SC : FI<0x38, (outs CPURegs:$dst), (ins CPURegs:$src, mem:$addr), + "sc\t$src, $addr", [], IIStore>; + /// Jump and Branch Instructions def J : JumpFJ<0x02, "j">; def JR : JumpFR<0x00, 0x08, "jr">; diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index 0a485ec5e54..df40e6c748a 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -48,11 +48,17 @@ private: std::pair InArgFIRange, OutArgFIRange; int GPFI; // Index of the frame object for restoring $gp unsigned MaxCallFrameSize; + + /// AtomicFrameIndex - To implement atomic.swap and atomic.cmp.swap + /// intrinsics, it is necessary to use a temporary stack location. + /// This field holds the frame index of this location. + int AtomicFrameIndex; public: MipsFunctionInfo(MachineFunction& MF) : SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), - OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), MaxCallFrameSize(0) + OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), MaxCallFrameSize(0), + AtomicFrameIndex(-1) {} bool isInArgFI(int FI) const { @@ -86,6 +92,9 @@ public: unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; } void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; } + + int getAtomicFrameIndex() const { return AtomicFrameIndex; } + void setAtomicFrameIndex(int Index) { AtomicFrameIndex = Index; } }; } // end of namespace llvm diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll new file mode 100644 index 00000000000..2d5555bd2cb --- /dev/null +++ b/test/CodeGen/Mips/atomic.ll @@ -0,0 +1,253 @@ +; RUN: llc -march=mipsel -mcpu=mips2 < %s | FileCheck %s + + +declare i32 @llvm.atomic.load.add.i32.p0i32(i32* nocapture, i32) nounwind +declare i32 @llvm.atomic.load.nand.i32.p0i32(i32* nocapture, i32) nounwind +declare i32 @llvm.atomic.swap.i32.p0i32(i32* nocapture, i32) nounwind +declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* nocapture, i32, i32) nounwind + +declare i8 @llvm.atomic.load.add.i8.p0i8(i8* nocapture, i8) nounwind +declare i8 @llvm.atomic.load.sub.i8.p0i8(i8* nocapture, i8) nounwind +declare i8 @llvm.atomic.load.nand.i8.p0i8(i8* nocapture, i8) nounwind +declare i8 @llvm.atomic.swap.i8.p0i8(i8* nocapture, i8) nounwind +declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* nocapture, i8, i8) nounwind + + +@x = common global i32 0, align 4 + +define i32 @AtomicLoadAdd32(i32 %incr) nounwind { +entry: + %0 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* @x, i32 %incr) + ret i32 %0 + +; CHECK: AtomicLoadAdd32: +; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp) +; CHECK: $[[BB0:[A-Z_0-9]+]]: +; CHECK: ll $[[R1:[0-9]+]], 0($[[R0]]) +; CHECK: or $2, $zero, $[[R1]] +; CHECK: addu $[[R2:[0-9]+]], $[[R1]], $4 +; CHECK: sc $[[R2]], 0($[[R0]]) +; CHECK: beq $[[R2]], $zero, $[[BB0]] +} + +define i32 @AtomicLoadNand32(i32 %incr) nounwind { +entry: + %0 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* @x, i32 %incr) + ret i32 %0 + +; CHECK: AtomicLoadNand32: +; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp) +; CHECK: $[[BB0:[A-Z_0-9]+]]: +; CHECK: ll $[[R1:[0-9]+]], 0($[[R0]]) +; CHECK: or $2, $zero, $[[R1]] +; CHECK: and $[[R1]], $[[R1]], $4 +; CHECK: nor $[[R2:[0-9]+]], $zero, $[[R1]] +; CHECK: sc $[[R2]], 0($[[R0]]) +; CHECK: beq $[[R2]], $zero, $[[BB0]] +} + +define i32 @AtomicSwap32(i32 %oldval) nounwind { +entry: + %0 = call i32 @llvm.atomic.swap.i32.p0i32(i32* @x, i32 %oldval) + ret i32 %0 + +; CHECK: AtomicSwap32: +; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp) +; CHECK: sw $4, [[OFFSET:[0-9]+]]($sp) +; CHECK: $[[BB0:[A-Z_0-9]+]]: +; CHECK: ll $[[R1:[0-9]+]], 0($[[R0]]) +; CHECK: or $2, $zero, $[[R1]] +; CHECK: lw $[[R2:[0-9]+]], [[OFFSET]]($sp) +; CHECK: or $[[R3:[0-9]+]], $zero, $[[R2]] +; CHECK: sc $[[R3]], 0($[[R0]]) +; CHECK: beq $[[R3]], $zero, $[[BB0]] +} + +define i32 @AtomicCmpSwap32(i32 %oldval, i32 %newval) nounwind { +entry: + %0 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* @x, i32 %oldval, i32 %newval) + ret i32 %0 + +; CHECK: AtomicCmpSwap32: +; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp) +; CHECK: sw $5, [[OFFSET:[0-9]+]]($sp) +; CHECK: $[[BB0:[A-Z_0-9]+]]: +; CHECK: ll $2, 0($[[R0]]) +; CHECK: bne $2, $4, $[[BB1:[A-Z_0-9]+]] +; CHECK: lw $[[R1:[0-9]+]], [[OFFSET]]($sp) +; CHECK: or $[[R2:[0-9]+]], $zero, $[[R1]] +; CHECK: sc $[[R2]], 0($[[R0]]) +; CHECK: beq $[[R2]], $zero, $[[BB0]] +; CHECK: $[[BB1]]: +} + + + +@y = common global i8 0, align 1 + +define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind { +entry: + %0 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @y, i8 %incr) + ret i8 %0 + +; CHECK: AtomicLoadAdd8: +; CHECK: lw $[[R0:[0-9]+]], %got(y)($gp) +; CHECK: addiu $[[R1:[0-9]+]], $zero, -4 +; CHECK: and $[[R2:[0-9]+]], $[[R0]], $[[R1]] +; CHECK: andi $[[R3:[0-9]+]], $[[R0]], 3 +; CHECK: sll $[[R4:[0-9]+]], $[[R3]], 3 +; CHECK: ori $[[R5:[0-9]+]], $zero, 255 +; CHECK: sll $[[R6:[0-9]+]], $[[R5]], $[[R4]] +; CHECK: nor $[[R7:[0-9]+]], $zero, $[[R6]] +; CHECK: andi $[[R8:[0-9]+]], $4, 255 +; CHECK: sll $[[R9:[0-9]+]], $[[R8]], $[[R4]] + +; CHECK: $[[BB0:[A-Z_0-9]+]]: +; CHECK: ll $[[R10:[0-9]+]], 0($[[R2]]) +; CHECK: addu $[[R11:[0-9]+]], $[[R10]], $[[R9]] +; CHECK: and $[[R12:[0-9]+]], $[[R11]], $[[R6]] +; CHECK: and $[[R13:[0-9]+]], $[[R10]], $[[R7]] +; CHECK: or $[[R14:[0-9]+]], $[[R13]], $[[R12]] +; CHECK: sc $[[R14]], 0($[[R2]]) +; CHECK: beq $[[R14]], $zero, $[[BB0]] + +; CHECK: and $[[R15:[0-9]+]], $[[R10]], $[[R6]] +; CHECK: srl $[[R16:[0-9]+]], $[[R15]], $[[R4]] +; CHECK: sll $[[R17:[0-9]+]], $[[R16]], 24 +; CHECK: sra $2, $[[R17]], 24 +} + +define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind { +entry: + %0 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @y, i8 %incr) + ret i8 %0 + +; CHECK: AtomicLoadSub8: +; CHECK: lw $[[R0:[0-9]+]], %got(y)($gp) +; CHECK: addiu $[[R1:[0-9]+]], $zero, -4 +; CHECK: and $[[R2:[0-9]+]], $[[R0]], $[[R1]] +; CHECK: andi $[[R3:[0-9]+]], $[[R0]], 3 +; CHECK: sll $[[R4:[0-9]+]], $[[R3]], 3 +; CHECK: ori $[[R5:[0-9]+]], $zero, 255 +; CHECK: sll $[[R6:[0-9]+]], $[[R5]], $[[R4]] +; CHECK: nor $[[R7:[0-9]+]], $zero, $[[R6]] +; CHECK: subu $[[R18:[0-9]+]], $zero, $4 +; CHECK: andi $[[R8:[0-9]+]], $[[R18]], 255 +; CHECK: sll $[[R9:[0-9]+]], $[[R8]], $[[R4]] + +; CHECK: $[[BB0:[A-Z_0-9]+]]: +; CHECK: ll $[[R10:[0-9]+]], 0($[[R2]]) +; CHECK: addu $[[R11:[0-9]+]], $[[R10]], $[[R9]] +; CHECK: and $[[R12:[0-9]+]], $[[R11]], $[[R6]] +; CHECK: and $[[R13:[0-9]+]], $[[R10]], $[[R7]] +; CHECK: or $[[R14:[0-9]+]], $[[R13]], $[[R12]] +; CHECK: sc $[[R14]], 0($[[R2]]) +; CHECK: beq $[[R14]], $zero, $[[BB0]] + +; CHECK: and $[[R15:[0-9]+]], $[[R10]], $[[R6]] +; CHECK: srl $[[R16:[0-9]+]], $[[R15]], $[[R4]] +; CHECK: sll $[[R17:[0-9]+]], $[[R16]], 24 +; CHECK: sra $2, $[[R17]], 24 +} + +define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind { +entry: + %0 = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* @y, i8 %incr) + ret i8 %0 + +; CHECK: AtomicLoadNand8: +; CHECK: lw $[[R0:[0-9]+]], %got(y)($gp) +; CHECK: addiu $[[R1:[0-9]+]], $zero, -4 +; CHECK: and $[[R2:[0-9]+]], $[[R0]], $[[R1]] +; CHECK: andi $[[R3:[0-9]+]], $[[R0]], 3 +; CHECK: sll $[[R4:[0-9]+]], $[[R3]], 3 +; CHECK: ori $[[R5:[0-9]+]], $zero, 255 +; CHECK: sll $[[R6:[0-9]+]], $[[R5]], $[[R4]] +; CHECK: nor $[[R7:[0-9]+]], $zero, $[[R6]] +; CHECK: andi $[[R8:[0-9]+]], $4, 255 +; CHECK: sll $[[R9:[0-9]+]], $[[R8]], $[[R4]] + +; CHECK: $[[BB0:[A-Z_0-9]+]]: +; CHECK: ll $[[R10:[0-9]+]], 0($[[R2]]) +; CHECK: and $[[R18:[0-9]+]], $[[R10]], $[[R9]] +; CHECK: nor $[[R11:[0-9]+]], $zero, $[[R18]] +; CHECK: and $[[R12:[0-9]+]], $[[R11]], $[[R6]] +; CHECK: and $[[R13:[0-9]+]], $[[R10]], $[[R7]] +; CHECK: or $[[R14:[0-9]+]], $[[R13]], $[[R12]] +; CHECK: sc $[[R14]], 0($[[R2]]) +; CHECK: beq $[[R14]], $zero, $[[BB0]] + +; CHECK: and $[[R15:[0-9]+]], $[[R10]], $[[R6]] +; CHECK: srl $[[R16:[0-9]+]], $[[R15]], $[[R4]] +; CHECK: sll $[[R17:[0-9]+]], $[[R16]], 24 +; CHECK: sra $2, $[[R17]], 24 +} + +define signext i8 @AtomicSwap8(i8 signext %oldval) nounwind { +entry: + %0 = call i8 @llvm.atomic.swap.i8.p0i8(i8* @y, i8 %oldval) + ret i8 %0 + +; CHECK: AtomicSwap8: +; CHECK: lw $[[R0:[0-9]+]], %got(y)($gp) +; CHECK: addiu $[[R1:[0-9]+]], $zero, -4 +; CHECK: and $[[R2:[0-9]+]], $[[R0]], $[[R1]] +; CHECK: andi $[[R3:[0-9]+]], $[[R0]], 3 +; CHECK: sll $[[R4:[0-9]+]], $[[R3]], 3 +; CHECK: ori $[[R5:[0-9]+]], $zero, 255 +; CHECK: sll $[[R6:[0-9]+]], $[[R5]], $[[R4]] +; CHECK: nor $[[R7:[0-9]+]], $zero, $[[R6]] +; CHECK: andi $[[R8:[0-9]+]], $4, 255 +; CHECK: sll $[[R9:[0-9]+]], $[[R8]], $[[R4]] +; CHECK: sw $[[R9]], [[OFFSET:[0-9]+]]($sp) + +; CHECK: $[[BB0:[A-Z_0-9]+]]: +; CHECK: ll $[[R10:[0-9]+]], 0($[[R2]]) +; CHECK: lw $[[R18:[0-9]+]], [[OFFSET]]($sp) +; CHECK: or $[[R11:[0-9]+]], $zero, $[[R18]] +; CHECK: and $[[R12:[0-9]+]], $[[R11]], $[[R6]] +; CHECK: and $[[R13:[0-9]+]], $[[R10]], $[[R7]] +; CHECK: or $[[R14:[0-9]+]], $[[R13]], $[[R12]] +; CHECK: sc $[[R14]], 0($[[R2]]) +; CHECK: beq $[[R14]], $zero, $[[BB0]] + +; CHECK: and $[[R15:[0-9]+]], $[[R10]], $[[R6]] +; CHECK: srl $[[R16:[0-9]+]], $[[R15]], $[[R4]] +; CHECK: sll $[[R17:[0-9]+]], $[[R16]], 24 +; CHECK: sra $2, $[[R17]], 24 +} + +define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwind { +entry: + %0 = call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* @y, i8 %oldval, i8 %newval) + ret i8 %0 + +; CHECK: AtomicCmpSwap8: +; CHECK: lw $[[R0:[0-9]+]], %got(y)($gp) +; CHECK: addiu $[[R1:[0-9]+]], $zero, -4 +; CHECK: and $[[R2:[0-9]+]], $[[R0]], $[[R1]] +; CHECK: andi $[[R3:[0-9]+]], $[[R0]], 3 +; CHECK: sll $[[R4:[0-9]+]], $[[R3]], 3 +; CHECK: ori $[[R5:[0-9]+]], $zero, 255 +; CHECK: sll $[[R6:[0-9]+]], $[[R5]], $[[R4]] +; CHECK: nor $[[R7:[0-9]+]], $zero, $[[R6]] +; CHECK: andi $[[R8:[0-9]+]], $4, 255 +; CHECK: sll $[[R9:[0-9]+]], $[[R8]], $[[R4]] +; CHECK: andi $[[R10:[0-9]+]], $5, 255 +; CHECK: sll $[[R11:[0-9]+]], $[[R10]], $[[R4]] + +; CHECK: $[[BB0:[A-Z_0-9]+]]: +; CHECK: ll $[[R12:[0-9]+]], 0($[[R2]]) +; CHECK: and $[[R13:[0-9]+]], $[[R12]], $[[R6]] +; CHECK: bne $[[R13]], $[[R9]], $[[BB1:[A-Z_0-9]+]] + +; CHECK: and $[[R14:[0-9]+]], $[[R12]], $[[R7]] +; CHECK: or $[[R15:[0-9]+]], $[[R14]], $[[R11]] +; CHECK: sc $[[R15]], 0($[[R2]]) +; CHECK: beq $[[R15]], $zero, $[[BB0]] + +; CHECK: $[[BB1]]: +; CHECK: srl $[[R16:[0-9]+]], $[[R13]], $[[R4]] +; CHECK: sll $[[R17:[0-9]+]], $[[R16]], 24 +; CHECK: sra $2, $[[R17]], 24 +} -- 2.34.1