From 13f4476c55c78e950227e1db39375df8221c9ec8 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 11 Aug 2014 22:18:17 +0000 Subject: [PATCH] R600/SI: Add a ComplexPattern for selecting MUBUF _OFFSET variant This saves us from having to copy a 64-bit 0 value into VGPRs for BUFFER_* instruction which only have a 12-bit immediate offset. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215399 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 147 ++++++++---- lib/Target/R600/SIInstrInfo.cpp | 259 +++++++++++++-------- lib/Target/R600/SIInstrInfo.h | 1 + lib/Target/R600/SIInstrInfo.td | 37 ++- test/CodeGen/R600/ctpop.ll | 4 +- test/CodeGen/R600/extload.ll | 12 +- test/CodeGen/R600/mubuf.ll | 14 +- test/CodeGen/R600/private-memory.ll | 2 +- test/CodeGen/R600/schedule-global-loads.ll | 19 +- test/CodeGen/R600/sext-in-reg.ll | 6 +- test/CodeGen/R600/zero_extend.ll | 2 +- 11 files changed, 326 insertions(+), 177 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index d4c0987ad94..a624b701bc0 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -88,13 +88,16 @@ private: SDValue& Offset); bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); - bool SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr, SDValue &Offset, - SDValue &ImmOffset) const; + void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, + SDValue &SOffset, SDValue &Offset, SDValue &Offen, + SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, + SDValue &TFE) const; + bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, + SDValue &Offset) const; bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, SDValue &SOffset, SDValue &ImmOffset) const; - bool SelectMUBUFAddr32(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, - SDValue &SOffset, SDValue &Offset, SDValue &Offen, - SDValue &Idxen, SDValue &GLC, SDValue &SLC, + bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, + SDValue &Offset, SDValue &GLC, SDValue &SLC, SDValue &TFE) const; bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, @@ -750,11 +753,23 @@ static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { return isUInt<12>(Imm->getZExtValue()); } -bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr, - SDValue &Offset, - SDValue &ImmOffset) const { +void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, + SDValue &VAddr, SDValue &SOffset, + SDValue &Offset, SDValue &Offen, + SDValue &Idxen, SDValue &Addr64, + SDValue &GLC, SDValue &SLC, + SDValue &TFE) const { SDLoc DL(Addr); + GLC = CurDAG->getTargetConstant(0, MVT::i1); + SLC = CurDAG->getTargetConstant(0, MVT::i1); + TFE = CurDAG->getTargetConstant(0, MVT::i1); + + Idxen = CurDAG->getTargetConstant(0, MVT::i1); + Offen = CurDAG->getTargetConstant(0, MVT::i1); + Addr64 = CurDAG->getTargetConstant(0, MVT::i1); + SOffset = CurDAG->getTargetConstant(0, MVT::i32); + if (CurDAG->isBaseWithConstantOffset(Addr)) { SDValue N0 = Addr.getOperand(0); SDValue N1 = Addr.getOperand(1); @@ -763,59 +778,88 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr, if (isLegalMUBUFImmOffset(C1)) { if (N0.getOpcode() == ISD::ADD) { - // (add (add N2, N3), C1) + // (add (add N2, N3), C1) -> addr64 SDValue N2 = N0.getOperand(0); SDValue N3 = N0.getOperand(1); - Ptr = wrapAddr64Rsrc(CurDAG, DL, N2); - Offset = N3; - ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16); - return true; + Addr64 = CurDAG->getTargetConstant(1, MVT::i1); + Ptr = N2; + VAddr = N3; + Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16); + return; } - // (add N0, C1) - Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getTargetConstant(0, MVT::i64));; - Offset = N0; - ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16); - return true; + // (add N0, C1) -> offset + VAddr = CurDAG->getTargetConstant(0, MVT::i32); + Ptr = N0; + Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16); + return; } } if (Addr.getOpcode() == ISD::ADD) { - // (add N0, N1) + // (add N0, N1) -> addr64 SDValue N0 = Addr.getOperand(0); SDValue N1 = Addr.getOperand(1); - Ptr = wrapAddr64Rsrc(CurDAG, DL, N0); - Offset = N1; - ImmOffset = CurDAG->getTargetConstant(0, MVT::i16); - return true; + Addr64 = CurDAG->getTargetConstant(1, MVT::i1); + Ptr = N0; + VAddr = N1; + Offset = CurDAG->getTargetConstant(0, MVT::i16); + return; } - // default case - Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getConstant(0, MVT::i64)); - Offset = Addr; - ImmOffset = CurDAG->getTargetConstant(0, MVT::i16); - return true; + // default case -> offset + VAddr = CurDAG->getTargetConstant(0, MVT::i32); + Ptr = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i16); + } -/// \brief Return a resource descriptor with the 'Add TID' bit enabled -/// The TID (Thread ID) is multipled by the stride value (bits [61:48] -/// of the resource descriptor) to create an offset, which is added to the -/// resource ponter. -static SDValue buildScratchRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) { +bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, + SDValue &VAddr, + SDValue &Offset) const { + SDValue Ptr, SOffset, Offen, Idxen, Addr64, GLC, SLC, TFE; - uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | - 0xffffffff; + SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, + GLC, SLC, TFE); + + ConstantSDNode *C = cast(Addr64); + if (C->getSExtValue()) { + SDLoc DL(Addr); + SRsrc = wrapAddr64Rsrc(CurDAG, DL, Ptr); + return true; + } + return false; +} + +static SDValue buildRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr, + uint32_t RsrcDword1, uint64_t RsrcDword2And3) { SDValue PtrLo = DAG->getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr); SDValue PtrHi = DAG->getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr); + if (RsrcDword1) + PtrHi = SDValue(DAG->getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32, PtrHi, + DAG->getConstant(RsrcDword1, MVT::i32)), 0); + SDValue DataLo = DAG->getTargetConstant( - Rsrc & APInt::getAllOnesValue(32).getZExtValue(), MVT::i32); - SDValue DataHi = DAG->getTargetConstant(Rsrc >> 32, MVT::i32); + RsrcDword2And3 & APInt::getAllOnesValue(32).getZExtValue(), MVT::i32); + SDValue DataHi = DAG->getTargetConstant(RsrcDword2And3 >> 32, MVT::i32); const SDValue Ops[] = { PtrLo, PtrHi, DataLo, DataHi }; return SDValue(DAG->getMachineNode(AMDGPU::SI_BUFFER_RSRC, DL, MVT::v4i32, Ops), 0); } +/// \brief Return a resource descriptor with the 'Add TID' bit enabled +/// The TID (Thread ID) is multipled by the stride value (bits [61:48] +/// of the resource descriptor) to create an offset, which is added to the +/// resource ponter. +static SDValue buildScratchRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) { + + uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | + 0xffffffff; // Size + + return buildRSRC(DAG, DL, Ptr, 0, Rsrc); +} + bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, SDValue &VAddr, SDValue &SOffset, SDValue &ImmOffset) const { @@ -870,20 +914,25 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, return true; } -bool AMDGPUDAGToDAGISel::SelectMUBUFAddr32(SDValue Addr, SDValue &SRsrc, - SDValue &VAddr, SDValue &SOffset, - SDValue &Offset, SDValue &Offen, - SDValue &Idxen, SDValue &GLC, - SDValue &SLC, SDValue &TFE) const { +bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, + SDValue &SOffset, SDValue &Offset, + SDValue &GLC, SDValue &SLC, + SDValue &TFE) const { + SDValue Ptr, VAddr, Offen, Idxen, Addr64; - GLC = CurDAG->getTargetConstant(0, MVT::i1); - SLC = CurDAG->getTargetConstant(0, MVT::i1); - TFE = CurDAG->getTargetConstant(0, MVT::i1); + SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, + GLC, SLC, TFE); - Idxen = CurDAG->getTargetConstant(0, MVT::i1); - Offen = CurDAG->getTargetConstant(1, MVT::i1); - - return SelectMUBUFScratch(Addr, SRsrc, VAddr, SOffset, Offset); + if (!cast(Offen)->getSExtValue() && + !cast(Idxen)->getSExtValue() && + !cast(Addr64)->getSExtValue()) { + uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | + APInt::getAllOnesValue(32).getZExtValue(); // Size + SDLoc DL(Addr); + SRsrc = buildRSRC(CurDAG, DL, Ptr, 0, Rsrc); + return true; + } + return false; } bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 28a60aaef1e..c9e121ffd7e 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -45,6 +45,33 @@ static SDValue findChainOperand(SDNode *Load) { return LastOp; } +/// \brief Returns true if both nodes have the same value for the given +/// operand \p Op, or if both nodes do not have this operand. +static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { + unsigned Opc0 = N0->getMachineOpcode(); + unsigned Opc1 = N1->getMachineOpcode(); + + int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName); + int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName); + + if (Op0Idx == -1 && Op1Idx == -1) + return true; + + + if ((Op0Idx == -1 && Op1Idx != -1) || + (Op1Idx == -1 && Op0Idx != -1)) + return false; + + // getNamedOperandIdx returns the index for the MachineInstr's operands, + // which includes the result as the first operand. We are indexing into the + // MachineSDNode's operands, so we need to skip the result operand to get + // the real index. + --Op0Idx; + --Op1Idx; + + return N0->getOperand(Op0Idx) == N0->getOperand(Op1Idx); +} + bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const { @@ -98,32 +125,35 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, // MUBUF and MTBUF can access the same addresses. if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) { - // Skip if an SGPR offset is applied. I don't think we ever emit any of - // variants that use this currently. - int SoffsetIdx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::soffset); - if (SoffsetIdx != -1) + + // MUBUF and MTBUF have vaddr at different indices. + if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) || + findChainOperand(Load0) != findChainOperand(Load1) || + !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) || + !nodesHaveSameOperandValue(Load1, Load1, AMDGPU::OpName::srsrc)) return false; - // getNamedOperandIdx returns the index for the MachineInstr's operands, - // which includes the result as the first operand. We are indexing into the - // MachineSDNode's operands, so we need to skip the result operand to get - // the real index. - --SoffsetIdx; + int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset); + int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset); - // Check chain. - if (findChainOperand(Load0) != findChainOperand(Load1)) + if (OffIdx0 == -1 || OffIdx1 == -1) return false; - // MUBUF and MTBUF have vaddr at different indices. - int VaddrIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::vaddr) - 1; - int VaddrIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::vaddr) - 1; - if (Load0->getOperand(VaddrIdx0) != Load1->getOperand(VaddrIdx1)) + // getNamedOperandIdx returns the index for MachineInstrs. Since they + // inlcude the output in the operand list, but SDNodes don't, we need to + // subtract the index by one. + --OffIdx0; + --OffIdx1; + + SDValue Off0 = Load0->getOperand(OffIdx0); + SDValue Off1 = Load1->getOperand(OffIdx1); + + // The offset might be a FrameIndexSDNode. + if (!isa(Off0) || !isa(Off1)) return false; - int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset) - 1; - int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset) - 1; - Offset0 = cast(Load0->getOperand(OffIdx0))->getZExtValue(); - Offset1 = cast(Load1->getOperand(OffIdx1))->getZExtValue(); + Offset0 = cast(Off0)->getZExtValue(); + Offset1 = cast(Off1)->getZExtValue(); return true; } @@ -1276,105 +1306,128 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { // Legalize MUBUF* instructions // FIXME: If we start using the non-addr64 instructions for compute, we // may need to legalize them here. + int SRsrcIdx = + AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc); + if (SRsrcIdx != -1) { + // We have an MUBUF instruction + MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx); + unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass; + if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()), + RI.getRegClass(SRsrcRC))) { + // The operands are legal. + // FIXME: We may need to legalize operands besided srsrc. + return; + } - int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), - AMDGPU::OpName::srsrc); - int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), - AMDGPU::OpName::vaddr); - if (SRsrcIdx != -1 && VAddrIdx != -1) { - const TargetRegisterClass *VAddrRC = - RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass); - - if(VAddrRC->getSize() == 8 && - MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) { - // We have a MUBUF instruction that uses a 64-bit vaddr register and - // srsrc has the incorrect register class. In order to fix this, we - // need to extract the pointer from the resource descriptor (srsrc), - // add it to the value of vadd, then store the result in the vaddr - // operand. Then, we need to set the pointer field of the resource - // descriptor to zero. + MachineBasicBlock &MBB = *MI->getParent(); + // Extract the the ptr from the resource descriptor. - MachineBasicBlock &MBB = *MI->getParent(); - MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx); - MachineOperand &VAddrOp = MI->getOperand(VAddrIdx); - unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi; - unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); - unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); - unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); - unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); - - // SRsrcPtrLo = srsrc:sub0 - SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp, - &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass); - - // SRsrcPtrHi = srsrc:sub1 - SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp, - &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass); - - // VAddrLo = vaddr:sub0 - VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp, - &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass); - - // VAddrHi = vaddr:sub1 - VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp, - &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass); - - // NewVaddrLo = SRsrcPtrLo + VAddrLo + // SRsrcPtrLo = srsrc:sub0 + unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc, + &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass); + + // SRsrcPtrHi = srsrc:sub1 + unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc, + &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass); + + // Create an empty resource descriptor + unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); + + // Zero64 = 0 + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64), + Zero64) + .addImm(0); + + // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), + SRsrcFormatLo) + .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF); + + // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), + SRsrcFormatHi) + .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); + + // NewSRsrc = {Zero64, SRsrcFormat} + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), + NewSRsrc) + .addReg(Zero64) + .addImm(AMDGPU::sub0_sub1) + .addReg(SRsrcFormatLo) + .addImm(AMDGPU::sub2) + .addReg(SRsrcFormatHi) + .addImm(AMDGPU::sub3); + + MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); + unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + unsigned NewVAddrLo; + unsigned NewVAddrHi; + if (VAddr) { + // This is already an ADDR64 instruction so we need to add the pointer + // extracted from the resource descriptor to the current value of VAddr. + NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + + // NewVaddrLo = SRsrcPtrLo + VAddr:sub0 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32), NewVAddrLo) .addReg(SRsrcPtrLo) - .addReg(VAddrLo) - .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit); + .addReg(VAddr->getReg(), 0, AMDGPU::sub0) + .addReg(AMDGPU::VCC, RegState::ImplicitDefine); - // NewVaddrHi = SRsrcPtrHi + VAddrHi + // NewVaddrHi = SRsrcPtrHi + VAddr:sub1 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi) .addReg(SRsrcPtrHi) - .addReg(VAddrHi) + .addReg(VAddr->getReg(), 0, AMDGPU::sub1) .addReg(AMDGPU::VCC, RegState::ImplicitDefine) .addReg(AMDGPU::VCC, RegState::Implicit); - // NewVaddr = {NewVaddrHi, NewVaddrLo} - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), - NewVAddr) - .addReg(NewVAddrLo) - .addImm(AMDGPU::sub0) - .addReg(NewVAddrHi) - .addImm(AMDGPU::sub1); - - // Zero64 = 0 - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64), - Zero64) - .addImm(0); + } else { + // This instructions is the _OFFSET variant, so we need to convert it to + // ADDR64. + MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata); + MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset); + MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset); + assert(SOffset->isImm() && SOffset->getImm() == 0 && "Legalizing MUBUF " + "with non-zero soffset is not implemented"); + + // Create the new instruction. + unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode()); + MachineInstr *Addr64 = + BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) + .addOperand(*VData) + .addOperand(*SRsrc) + .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. + // This will be replaced later + // with the new value of vaddr. + .addOperand(*Offset); + + MI->removeFromParent(); + MI = Addr64; + + NewVAddrLo = SRsrcPtrLo; + NewVAddrHi = SRsrcPtrHi; + VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); + SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc); + } - // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), - SRsrcFormatLo) - .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF); + // NewVaddr = {NewVaddrHi, NewVaddrLo} + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), + NewVAddr) + .addReg(NewVAddrLo) + .addImm(AMDGPU::sub0) + .addReg(NewVAddrHi) + .addImm(AMDGPU::sub1); - // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), - SRsrcFormatHi) - .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); - // NewSRsrc = {Zero64, SRsrcFormat} - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), - NewSRsrc) - .addReg(Zero64) - .addImm(AMDGPU::sub0_sub1) - .addReg(SRsrcFormatLo) - .addImm(AMDGPU::sub2) - .addReg(SRsrcFormatHi) - .addImm(AMDGPU::sub3); - - // Update the instruction to use NewVaddr - MI->getOperand(VAddrIdx).setReg(NewVAddr); - // Update the instruction to use NewSRsrc - MI->getOperand(SRsrcIdx).setReg(NewSRsrc); - } + // Update the instruction to use NewVaddr + VAddr->setReg(NewVAddr); + // Update the instruction to use NewSRsrc + SRsrc->setReg(NewSRsrc); } } diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index f1060637a10..7085e986835 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -209,6 +209,7 @@ namespace AMDGPU { int getCommuteRev(uint16_t Opcode); int getCommuteOrig(uint16_t Opcode); int getMCOpcode(uint16_t Opcode, unsigned Gen); + int getAddr64Inst(uint16_t Opcode); const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; const uint64_t RSRC_TID_ENABLE = 1LL << 55; diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 9214ecc1e25..3c5a3a2733b 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -194,6 +194,7 @@ def tfe : Operand { def MUBUFAddr32 : ComplexPattern; def MUBUFAddr64 : ComplexPattern; def MUBUFScratch : ComplexPattern; +def MUBUFOffset : ComplexPattern; def VOP3Mods0 : ComplexPattern; def VOP3Mods : ComplexPattern; @@ -901,6 +902,11 @@ class DS_1A1D_NORET op, string asm, RegisterClass rc> : DS_1A < let mayLoad = 1; } +class MUBUFAddr64Table { + + bit IsAddr64 = is_addr64; +} + class MTBUF_Store_Helper op, string asm, RegisterClass regClass> : MTBUF < op, (outs), @@ -927,7 +933,11 @@ multiclass MUBUF_Load_Helper op, string asm, RegisterClass regClass, (ins SReg_128:$srsrc, mbuf_offset:$offset, SSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe), - asm#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe", []>; + asm#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe", + [(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc, + i32:$soffset, i16:$offset, + i1:$glc, i1:$slc, i1:$tfe)))]>, + MUBUFAddr64Table<0>; } let offen = 1, idxen = 0 in { @@ -959,7 +969,7 @@ multiclass MUBUF_Load_Helper op, string asm, RegisterClass regClass, (ins SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset), asm#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset", [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc, - i64:$vaddr, i16:$offset)))]>; + i64:$vaddr, i16:$offset)))]>, MUBUFAddr64Table<1>; } } } @@ -979,6 +989,18 @@ multiclass MUBUF_Store_Helper op, string name, RegisterClass vdataClass [] >; + let offen = 0, idxen = 0, vaddr = 0 in { + def _OFFSET : MUBUF < + op, (outs), + (ins vdataClass:$vdata, SReg_128:$srsrc, mbuf_offset:$offset, + SSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe), + name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe", + [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, + i16:$offset, i1:$glc, i1:$slc, + i1:$tfe))] + >, MUBUFAddr64Table<0>; + } // offen = 0, idxen = 0, vaddr = 0 + let offen = 1, idxen = 0 in { def _OFFEN : MUBUF < op, (outs), @@ -997,7 +1019,8 @@ multiclass MUBUF_Store_Helper op, string name, RegisterClass vdataClass (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset), name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset", [(st store_vt:$vdata, - (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))]> { + (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))]>, MUBUFAddr64Table<1> + { let mayLoad = 0; let mayStore = 1; @@ -1216,4 +1239,12 @@ def getMCOpcode : InstrMapping { let ValueCols = [[!cast(SISubtarget.SI)]]; } +def getAddr64Inst : InstrMapping { + let FilterClass = "MUBUFAddr64Table"; + let RowFields = ["NAME"]; + let ColFields = ["IsAddr64"]; + let KeyCol = ["0"]; + let ValueCols = [["1"]]; +} + include "SIInstructions.td" diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll index f26d30d7b80..c7c406a57e6 100644 --- a/test/CodeGen/R600/ctpop.ll +++ b/test/CodeGen/R600/ctpop.ll @@ -236,8 +236,8 @@ define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspa } ; FUNC-LABEL: @v_ctpop_i32_add_vvar_inv -; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 {{addr64$}} -; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], {{.*}} offset:0x10 +; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}} +; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:0x10 ; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] ; SI: BUFFER_STORE_DWORD [[RESULT]], ; SI: S_ENDPGM diff --git a/test/CodeGen/R600/extload.ll b/test/CodeGen/R600/extload.ll index dc056e0ecdd..9725bbfb709 100644 --- a/test/CodeGen/R600/extload.ll +++ b/test/CodeGen/R600/extload.ll @@ -87,8 +87,8 @@ define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1) } ; FUNC-LABEL: @zextload_global_i8_to_i64 -; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0 -; SI: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]], +; SI-DAG: S_MOV_B32 [[ZERO:s[0-9]+]], 0{{$}} +; SI-DAG: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]], ; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]] ; SI: BUFFER_STORE_DWORDX2 define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { @@ -99,8 +99,8 @@ define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* } ; FUNC-LABEL: @zextload_global_i16_to_i64 -; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0 -; SI: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]], +; SI-DAG: S_MOV_B32 [[ZERO:s[0-9]+]], 0{{$}} +; SI-DAG: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]], ; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]] ; SI: BUFFER_STORE_DWORDX2 define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { @@ -111,8 +111,8 @@ define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1) } ; FUNC-LABEL: @zextload_global_i32_to_i64 -; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0 -; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]], +; SI-DAG: S_MOV_B32 [[ZERO:s[0-9]+]], 0{{$}} +; SI-DAG: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]], ; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]] ; SI: BUFFER_STORE_DWORDX2 define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { diff --git a/test/CodeGen/R600/mubuf.ll b/test/CodeGen/R600/mubuf.ll index 27faacf4500..bbfd329fc08 100644 --- a/test/CodeGen/R600/mubuf.ll +++ b/test/CodeGen/R600/mubuf.ll @@ -6,7 +6,7 @@ ; MUBUF load with an immediate byte offset that fits into 12-bits ; CHECK-LABEL: @mubuf_load0 -; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80,0x30,0xe0 +; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0x4 ; encoding: [0x04,0x00,0x30,0xe0 define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: %0 = getelementptr i32 addrspace(1)* %in, i64 1 @@ -17,7 +17,7 @@ entry: ; MUBUF load with the largest possible immediate offset ; CHECK-LABEL: @mubuf_load1 -; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0xfff ; encoding: [0xff,0x8f,0x20,0xe0 +; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0xfff ; encoding: [0xff,0x0f,0x20,0xe0 define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) { entry: %0 = getelementptr i8 addrspace(1)* %in, i64 4095 @@ -28,7 +28,7 @@ entry: ; MUBUF load with an immediate byte offset that doesn't fit into 12-bits ; CHECK-LABEL: @mubuf_load2 -; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 ; encoding: [0x00,0x80 +; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 ; encoding: [0x00,0x80,0x30,0xe0 define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: %0 = getelementptr i32 addrspace(1)* %in, i64 1024 @@ -40,7 +40,7 @@ entry: ; MUBUF load with a 12-bit immediate offset and a register offset ; CHECK-LABEL: @mubuf_load3 ; CHECK-NOT: ADD -; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80,0x30,0xe0 +; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80,0x30,0xe0 define void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) { entry: %0 = getelementptr i32 addrspace(1)* %in, i64 %offset @@ -56,7 +56,7 @@ entry: ; MUBUF store with an immediate byte offset that fits into 12-bits ; CHECK-LABEL: @mubuf_store0 -; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80,0x70,0xe0 +; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0x4 ; encoding: [0x04,0x00,0x70,0xe0 define void @mubuf_store0(i32 addrspace(1)* %out) { entry: %0 = getelementptr i32 addrspace(1)* %out, i64 1 @@ -66,7 +66,7 @@ entry: ; MUBUF store with the largest possible immediate offset ; CHECK-LABEL: @mubuf_store1 -; CHECK: BUFFER_STORE_BYTE v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0xfff ; encoding: [0xff,0x8f,0x60,0xe0 +; CHECK: BUFFER_STORE_BYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0xfff ; encoding: [0xff,0x0f,0x60,0xe0 define void @mubuf_store1(i8 addrspace(1)* %out) { entry: @@ -77,7 +77,7 @@ entry: ; MUBUF store with an immediate byte offset that doesn't fit into 12-bits ; CHECK-LABEL: @mubuf_store2 -; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 ; encoding: [0x00,0x80,0x70,0xe0 +; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0 addr64 ; encoding: [0x00,0x80,0x70,0xe0 define void @mubuf_store2(i32 addrspace(1)* %out) { entry: %0 = getelementptr i32 addrspace(1)* %out, i64 1024 diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll index b0f9c98e9d5..505ef6c51df 100644 --- a/test/CodeGen/R600/private-memory.ll +++ b/test/CodeGen/R600/private-memory.ll @@ -118,7 +118,7 @@ for.end: ; SI-PROMOTE-DAG: BUFFER_STORE_SHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x68,0xe0 ; SI-PROMOTE-DAG: BUFFER_STORE_SHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:0x2 ; encoding: [0x02,0x10,0x68,0xe0 -; SI_PROMOTE: BUFFER_LOAD_SSHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} +; SI-PROMOTE: BUFFER_LOAD_SSHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} define void @short_array(i32 addrspace(1)* %out, i32 %index) { entry: %0 = alloca [2 x i16] diff --git a/test/CodeGen/R600/schedule-global-loads.ll b/test/CodeGen/R600/schedule-global-loads.ll index f73d3030564..fcff65f02ef 100644 --- a/test/CodeGen/R600/schedule-global-loads.ll +++ b/test/CodeGen/R600/schedule-global-loads.ll @@ -9,8 +9,8 @@ declare i32 @llvm.r600.read.tidig.x() #1 ; ordering the loads so that the lower address loads come first. ; FUNC-LABEL: @cluster_global_arg_loads -; SI: BUFFER_LOAD_DWORD [[REG0:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 -; SI: BUFFER_LOAD_DWORD [[REG1:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; SI-DAG: BUFFER_LOAD_DWORD [[REG0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; SI-DAG: BUFFER_LOAD_DWORD [[REG1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:0x4 ; SI: BUFFER_STORE_DWORD [[REG0]] ; SI: BUFFER_STORE_DWORD [[REG1]] define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 { @@ -22,5 +22,20 @@ define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* ret void } +; Test for a crach in SIInstrInfo::areLoadsFromSameBasePtr() when checking +; an MUBUF load which does not have a vaddr operand. +; FUNC-LABEL: @same_base_ptr_crash +; SI: BUFFER_LOAD_DWORD +; SI: BUFFER_LOAD_DWORD +define void @same_base_ptr_crash(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) { +entry: + %out1 = getelementptr i32 addrspace(1)* %out, i32 %offset + %tmp0 = load i32 addrspace(1)* %out + %tmp1 = load i32 addrspace(1)* %out1 + %tmp2 = add i32 %tmp0, %tmp1 + store i32 %tmp2, i32 addrspace(1)* %out + ret void +} + attributes #0 = { nounwind } attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll index 1b02e4bf801..14f1cdf3b95 100644 --- a/test/CodeGen/R600/sext-in-reg.ll +++ b/test/CodeGen/R600/sext-in-reg.ll @@ -75,9 +75,9 @@ define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, } ; FUNC-LABEL: @sext_in_reg_i1_to_i64 +; SI: S_MOV_B32 {{s[0-9]+}}, -1 ; SI: S_ADD_I32 [[VAL:s[0-9]+]], ; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000 -; SI: S_MOV_B32 {{s[0-9]+}}, -1 ; SI: BUFFER_STORE_DWORDX2 define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { %c = add i64 %a, %b @@ -88,9 +88,9 @@ define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounw } ; FUNC-LABEL: @sext_in_reg_i8_to_i64 +; SI: S_MOV_B32 {{s[0-9]+}}, -1 ; SI: S_ADD_I32 [[VAL:s[0-9]+]], ; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]] -; SI: S_MOV_B32 {{s[0-9]+}}, -1 ; SI: BUFFER_STORE_DWORDX2 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]] @@ -112,9 +112,9 @@ define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounw } ; FUNC-LABEL: @sext_in_reg_i16_to_i64 +; SI: S_MOV_B32 {{s[0-9]+}}, -1 ; SI: S_ADD_I32 [[VAL:s[0-9]+]], ; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]] -; SI: S_MOV_B32 {{s[0-9]+}}, -1 ; SI: BUFFER_STORE_DWORDX2 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]] diff --git a/test/CodeGen/R600/zero_extend.ll b/test/CodeGen/R600/zero_extend.ll index 8585d4ab191..1a0fd731d7d 100644 --- a/test/CodeGen/R600/zero_extend.ll +++ b/test/CodeGen/R600/zero_extend.ll @@ -6,7 +6,7 @@ ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW ; SI-CHECK: @test -; SI-CHECK: S_MOV_B32 [[ZERO:s[0-9]]], 0 +; SI-CHECK: S_MOV_B32 [[ZERO:s[0-9]]], 0{{$}} ; SI-CHECK: V_MOV_B32_e32 v[[V_ZERO:[0-9]]], [[ZERO]] ; SI-CHECK: BUFFER_STORE_DWORDX2 v[0:[[V_ZERO]]{{\]}} define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { -- 2.34.1