From: Tom Stellard Date: Thu, 9 Oct 2014 19:06:00 +0000 (+0000) Subject: R600/SI: Legalize CopyToReg during instruction selection X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=a8b2e6f4af1eae41e2e1f2f2df35959462a73321 R600/SI: Legalize CopyToReg during instruction selection The instruction emitter will crash if it encounters a CopyToReg node with a non-register operand like FrameIndex. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219428 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 32592129984..989fc938c08 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -487,9 +487,16 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { case AMDGPUISD::DIV_SCALE: { return SelectDIV_SCALE(N); } + case ISD::CopyToReg: { + const SITargetLowering& Lowering = + *static_cast(getTargetLowering()); + Lowering.legalizeTargetIndependentNode(N, *CurDAG); + break; + } case ISD::ADDRSPACECAST: return SelectAddrSpaceCast(N); } + return SelectCode(N); } diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 1a73d9bda58..49ac269998e 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -1920,28 +1920,26 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, } } -/// \brief Legalize INSERT_SUBREG instructions with frame index operands. -/// LLVM assumes that all INSERT_SUBREG inputs are registers. -static void legalizeInsertSubreg(MachineSDNode *InsertSubreg, - SelectionDAG &DAG) { - - assert(InsertSubreg->getMachineOpcode() == AMDGPU::INSERT_SUBREG); +/// \brief Legalize target independent instructions (e.g. INSERT_SUBREG) +/// with frame index operands. +/// LLVM assumes that inputs are to these instructions are registers. +void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node, + SelectionDAG &DAG) const { SmallVector Ops; - for (unsigned i = 0; i < 2; ++i) { - if (!isa(InsertSubreg->getOperand(i))) { - Ops.push_back(InsertSubreg->getOperand(i)); + for (unsigned i = 0; i < Node->getNumOperands(); ++i) { + if (!isa(Node->getOperand(i))) { + Ops.push_back(Node->getOperand(i)); continue; } - SDLoc DL(InsertSubreg); + SDLoc DL(Node); Ops.push_back(SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, - InsertSubreg->getOperand(i).getValueType(), - InsertSubreg->getOperand(i)), 0)); + Node->getOperand(i).getValueType(), + Node->getOperand(i)), 0)); } - DAG.UpdateNodeOperands(InsertSubreg, Ops[0], Ops[1], - InsertSubreg->getOperand(2)); + DAG.UpdateNodeOperands(Node, Ops); } /// \brief Fold the instructions after selecting them. diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 9e50f0b2d69..f2202557c08 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -101,6 +101,7 @@ public: int32_t analyzeImmediate(const SDNode *N) const; SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const override; + void legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const; }; } // End namespace llvm diff --git a/test/CodeGen/R600/copy-to-reg.ll b/test/CodeGen/R600/copy-to-reg.ll new file mode 100644 index 00000000000..f90ee782649 --- /dev/null +++ b/test/CodeGen/R600/copy-to-reg.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=r600 -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s + +; Test that CopyToReg instructions don't have non-register operands prior +; to being emitted. + +; Make sure this doesn't crash +; CHECK-LABEL: {{^}}copy_to_reg_frameindex: +define void @copy_to_reg_frameindex(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { +entry: + %alloca = alloca [16 x i32] + br label %loop + +loop: + %inc = phi i32 [0, %entry], [%inc.i, %loop] + %ptr = getelementptr [16 x i32]* %alloca, i32 0, i32 %inc + store i32 %inc, i32* %ptr + %inc.i = add i32 %inc, 1 + %cnd = icmp uge i32 %inc.i, 16 + br i1 %cnd, label %done, label %loop + +done: + %tmp0 = getelementptr [16 x i32]* %alloca, i32 0, i32 0 + %tmp1 = load i32* %tmp0 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +}