From cc18827f595d87e054cc312f3ccc805645dca926 Mon Sep 17 00:00:00 2001 From: Scott Michel Date: Thu, 4 Dec 2008 21:01:44 +0000 Subject: [PATCH] CellSPU: Fix bug 3055 - Add v4f32, v2f64 to LowerVECTOR_SHUFFLE - Look for vector rotate in shuffle elements, generate a vector rotate instead of a full-blown shuffle when opportunity presents itself. - Generate larger test harness and fix a few interesting but obscure bugs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@60552 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../CellSPU/AsmPrinter/SPUAsmPrinter.cpp | 1 - lib/Target/CellSPU/SPUISelLowering.cpp | 77 +++++++++++++------ lib/Target/CellSPU/SPUInstrInfo.td | 4 + 3 files changed, 59 insertions(+), 23 deletions(-) diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp index 31b7e6075b2..94d8cd5aa3f 100644 --- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp @@ -221,7 +221,6 @@ namespace { void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo) { printOp(MI->getOperand(OpNo)); - O << "-."; } void printSymbolHi(const MachineInstr *MI, unsigned OpNo) { diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 17d88a937ad..2b35e76595e 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -548,7 +548,6 @@ AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST, alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize()); prefSlotOffs = (int) (alignOffs & 0xf); prefSlotOffs -= vtm->prefslot_byte; - basePtr = DAG.getRegister(SPU::R1, VT); } else { alignOffs = 0; prefSlotOffs = -vtm->prefslot_byte; @@ -1127,6 +1126,8 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { ArgOffset += StackSlotSize; } break; + case MVT::v2i64: + case MVT::v2f64: case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: @@ -1255,6 +1256,7 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { NumResults = 1; break; case MVT::v2f64: + case MVT::v2i64: case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: @@ -1747,38 +1749,64 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // If we have a single element being moved from V1 to V2, this can be handled // using the C*[DX] compute mask instructions, but the vector elements have // to be monotonically increasing with one exception element. - MVT EltVT = V1.getValueType().getVectorElementType(); + MVT VecVT = V1.getValueType(); + MVT EltVT = VecVT.getVectorElementType(); unsigned EltsFromV2 = 0; unsigned V2Elt = 0; unsigned V2EltIdx0 = 0; unsigned CurrElt = 0; + unsigned MaxElts = VecVT.getVectorNumElements(); + unsigned PrevElt = 0; + unsigned V0Elt = 0; bool monotonic = true; - if (EltVT == MVT::i8) + bool rotate = true; + + if (EltVT == MVT::i8) { V2EltIdx0 = 16; - else if (EltVT == MVT::i16) + } else if (EltVT == MVT::i16) { V2EltIdx0 = 8; - else if (EltVT == MVT::i32) + } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { V2EltIdx0 = 4; - else + } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { + V2EltIdx0 = 2; + } else assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE"); - for (unsigned i = 0, e = PermMask.getNumOperands(); - EltsFromV2 <= 1 && monotonic && i != e; - ++i) { - unsigned SrcElt; - if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) - SrcElt = 0; - else - SrcElt = cast(PermMask.getOperand(i))->getZExtValue(); + for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) { + if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) { + unsigned SrcElt = cast (PermMask.getOperand(i))->getZExtValue(); - if (SrcElt >= V2EltIdx0) { - ++EltsFromV2; - V2Elt = (V2EltIdx0 - SrcElt) << 2; - } else if (CurrElt != SrcElt) { - monotonic = false; - } + if (monotonic) { + if (SrcElt >= V2EltIdx0) { + if (1 >= (++EltsFromV2)) { + V2Elt = (V2EltIdx0 - SrcElt) << 2; + } + } else if (CurrElt != SrcElt) { + monotonic = false; + } - ++CurrElt; + ++CurrElt; + } + + if (rotate) { + if (PrevElt > 0 && SrcElt < MaxElts) { + if ((PrevElt == SrcElt - 1) + || (PrevElt == MaxElts - 1 && SrcElt == 0)) { + PrevElt = SrcElt; + if (SrcElt == 0) + V0Elt = i; + } else { + rotate = false; + } + } else if (PrevElt == 0) { + // First time through, need to keep track of previous element + PrevElt = SrcElt; + } else { + // This isn't a rotation, takes elements from vector 2 + rotate = false; + } + } + } } if (EltsFromV2 == 1 && monotonic) { @@ -1797,6 +1825,11 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { DAG.getCopyFromReg(InitTempReg, VReg, PtrVT)); // Use shuffle mask in SHUFB synthetic instruction: return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp); + } else if (rotate) { + int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8; + + return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(), + V1, DAG.getConstant(rotamt, MVT::i16)); } else { // Convert the SHUFFLE_VECTOR mask's input element units to the // actual bytes. @@ -2127,7 +2160,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue ShufMask[4]; for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) { - unsigned bidx = i / 4; + unsigned bidx = i * 4; unsigned int bits = ((ShufBytes[bidx] << 24) | (ShufBytes[bidx+1] << 16) | (ShufBytes[bidx+2] << 8) | diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 03f79d36ef4..678f8e9dac2 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -2124,7 +2124,9 @@ multiclass RotateQuadLeftByBytes def v16i8: ROTQBYVecInst; def v8i16: ROTQBYVecInst; def v4i32: ROTQBYVecInst; + def v4f32: ROTQBYVecInst; def v2i64: ROTQBYVecInst; + def v2f64: ROTQBYVecInst; } defm ROTQBY: RotateQuadLeftByBytes; @@ -2147,7 +2149,9 @@ multiclass RotateQuadByBytesImm def v16i8: ROTQBYIVecInst; def v8i16: ROTQBYIVecInst; def v4i32: ROTQBYIVecInst; + def v4f32: ROTQBYIVecInst; def v2i64: ROTQBYIVecInst; + def vfi64: ROTQBYIVecInst; } defm ROTQBYI: RotateQuadByBytesImm; -- 2.34.1