From: Vincent Lejeune Date: Tue, 5 Mar 2013 15:04:49 +0000 (+0000) Subject: R600: Turn BUILD_VECTOR into Reg_Sequence X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=cae6801b7df7c006d2956d5b6012b902cf6eb14d R600: Turn BUILD_VECTOR into Reg_Sequence Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176487 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index e77b9dca49f..0c7880d232b 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -162,6 +162,35 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { } switch (Opc) { default: break; + case ISD::BUILD_VECTOR: { + const AMDGPUSubtarget &ST = TM.getSubtarget(); + if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { + break; + } + // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG + // that adds a 128 bits reg copy when going through TwoAddressInstructions + // pass. We want to avoid 128 bits copies as much as possible because they + // can't be bundled by our scheduler. + SDValue RegSeqArgs[9] = { + CurDAG->getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32), + SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), + SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32), + SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32), + SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32) + }; + bool IsRegSeq = true; + for (unsigned i = 0; i < N->getNumOperands(); i++) { + if (dyn_cast(N->getOperand(i))) { + IsRegSeq = false; + break; + } + RegSeqArgs[2 * i + 1] = N->getOperand(i); + } + if (!IsRegSeq) + break; + return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), + RegSeqArgs, 2 * N->getNumOperands() + 1); + } case ISD::ConstantFP: case ISD::Constant: { const AMDGPUSubtarget &ST = TM.getSubtarget(); diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index 00f8b106c73..6c695a08d9a 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -37,7 +37,6 @@ add_llvm_target(R600CodeGen R600ExpandSpecialInstrs.cpp R600InstrInfo.cpp R600ISelLowering.cpp - R600LowerConstCopy.cpp R600MachineFunctionInfo.cpp R600RegisterInfo.cpp SIAnnotateControlFlow.cpp diff --git a/test/CodeGen/R600/fdiv.v4f32.ll b/test/CodeGen/R600/fdiv.v4f32.ll index 459fd119d20..79e677f541f 100644 --- a/test/CodeGen/R600/fdiv.v4f32.ll +++ b/test/CodeGen/R600/fdiv.v4f32.ll @@ -1,13 +1,13 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1