From: Vincent Lejeune Date: Tue, 4 Jun 2013 15:04:53 +0000 (+0000) Subject: R600: Swizzle texture/export instructions X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=98017a015bb862afce4f90d432eded4e28fe1d26;p=oota-llvm.git R600: Swizzle texture/export instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183229 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 00adca33e19..267f367c870 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1210,6 +1210,99 @@ EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { return VT.changeVectorElementTypeToInteger(); } +SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, + DenseMap &RemapSwizzle) { + assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR); + assert(RemapSwizzle.empty()); + SDValue NewBldVec[4] = { + VectorEntry.getOperand(0), + VectorEntry.getOperand(1), + VectorEntry.getOperand(2), + VectorEntry.getOperand(3) + }; + + for (unsigned i = 0; i < 4; i++) { + if (ConstantFPSDNode *C = dyn_cast(NewBldVec[i])) { + if (C->isZero()) { + RemapSwizzle[i] = 4; // SEL_0 + NewBldVec[i] = DAG.getUNDEF(MVT::f32); + } else if (C->isExactlyValue(1.0)) { + RemapSwizzle[i] = 5; // SEL_1 + NewBldVec[i] = DAG.getUNDEF(MVT::f32); + } + } + + if (NewBldVec[i].getOpcode() == ISD::UNDEF) + continue; + for (unsigned j = 0; j < i; j++) { + if (NewBldVec[i] == NewBldVec[j]) { + NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType()); + RemapSwizzle[i] = j; + break; + } + } + } + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry), + VectorEntry.getValueType(), NewBldVec, 4); +} + +SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, + DenseMap &RemapSwizzle) { + assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR); + assert(RemapSwizzle.empty()); + SDValue NewBldVec[4] = { + VectorEntry.getOperand(0), + VectorEntry.getOperand(1), + VectorEntry.getOperand(2), + VectorEntry.getOperand(3) + }; + bool isUnmovable[4] = { false, false, false, false }; + + for (unsigned i = 0; i < 4; i++) { + if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + unsigned Idx = dyn_cast(NewBldVec[i].getOperand(1)) + ->getZExtValue(); + if (!isUnmovable[Idx]) { + // Swap i and Idx + std::swap(NewBldVec[Idx], NewBldVec[i]); + RemapSwizzle[Idx] = i; + RemapSwizzle[i] = Idx; + } + isUnmovable[Idx] = true; + } + } + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry), + VectorEntry.getValueType(), NewBldVec, 4); +} + + +SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, +SDValue Swz[4], SelectionDAG &DAG) const { + assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR); + // Old -> New swizzle values + DenseMap SwizzleRemap; + + BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap); + for (unsigned i = 0; i < 4; i++) { + unsigned Idx = dyn_cast(Swz[i])->getZExtValue(); + if (SwizzleRemap.find(Idx) != SwizzleRemap.end()) + Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32); + } + + SwizzleRemap.clear(); + BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap); + for (unsigned i = 0; i < 4; i++) { + unsigned Idx = dyn_cast(Swz[i])->getZExtValue(); + if (SwizzleRemap.find(Idx) != SwizzleRemap.end()) + Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32); + } + + return BuildVector; +} + + //===----------------------------------------------------------------------===// // Custom DAG Optimizations //===----------------------------------------------------------------------===// @@ -1319,12 +1412,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, SDValue Arg = N->getOperand(1); if (Arg.getOpcode() != ISD::BUILD_VECTOR) break; - SDValue NewBldVec[4] = { - DAG.getUNDEF(MVT::f32), - DAG.getUNDEF(MVT::f32), - DAG.getUNDEF(MVT::f32), - DAG.getUNDEF(MVT::f32) - }; + SDValue NewArgs[8] = { N->getOperand(0), // Chain SDValue(), @@ -1335,23 +1423,40 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, N->getOperand(6), // SWZ_Z N->getOperand(7) // SWZ_W }; - for (unsigned i = 0; i < Arg.getNumOperands(); i++) { - if (ConstantFPSDNode *C = dyn_cast(Arg.getOperand(i))) { - if (C->isZero()) { - NewArgs[4 + i] = DAG.getConstant(4, MVT::i32); // SEL_0 - } else if (C->isExactlyValue(1.0)) { - NewArgs[4 + i] = DAG.getConstant(5, MVT::i32); // SEL_0 - } else { - NewBldVec[i] = Arg.getOperand(i); - } - } else { - NewBldVec[i] = Arg.getOperand(i); - } - } SDLoc DL(N); - NewArgs[1] = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, NewBldVec, 4); + NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG); return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8); } + case AMDGPUISD::TEXTURE_FETCH: { + SDValue Arg = N->getOperand(1); + if (Arg.getOpcode() != ISD::BUILD_VECTOR) + break; + + SDValue NewArgs[19] = { + N->getOperand(0), + N->getOperand(1), + N->getOperand(2), + N->getOperand(3), + N->getOperand(4), + N->getOperand(5), + N->getOperand(6), + N->getOperand(7), + N->getOperand(8), + N->getOperand(9), + N->getOperand(10), + N->getOperand(11), + N->getOperand(12), + N->getOperand(13), + N->getOperand(14), + N->getOperand(15), + N->getOperand(16), + N->getOperand(17), + N->getOperand(18), + }; + NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG); + return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(), + NewArgs, 19); + } } return SDValue(); } diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index 663aab4dfcd..2904396f4c6 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -51,6 +51,7 @@ private: void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB, MachineRegisterInfo & MRI, unsigned dword_offset) const; + SDValue OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], SelectionDAG &DAG) const; /// \brief Lower ROTL opcode to BITALIGN SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; diff --git a/test/CodeGen/R600/llvm.AMDGPU.tex.ll b/test/CodeGen/R600/llvm.AMDGPU.tex.ll index 4ea82bb4c6a..aac014bde45 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.tex.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.tex.ll @@ -5,12 +5,12 @@ ;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN ;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN ;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:UUNN -;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN -;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN -;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:UUNN +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZZ}} RID:0 SID:0 CT:NNNN +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZZ}} RID:0 SID:0 CT:NNNN +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZZ}} RID:0 SID:0 CT:UUNN +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYYW}} RID:0 SID:0 CT:NNUN ;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN -;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN -;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYYZ}} RID:0 SID:0 CT:NNUN ;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN ;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN ;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN