From: Scott Michel Date: Tue, 17 Mar 2009 01:15:45 +0000 (+0000) Subject: CellSPU: X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=7ea02ffe918baff29a39981276e83b0e845ede03;p=oota-llvm.git CellSPU: - Fix fabs, fneg for f32 and f64. - Use BuildVectorSDNode.isConstantSplat, now that the functionality exists - Continue to improve i64 constant lowering. Lower certain special constants to the constant pool when they correspond to SPU's shufb instruction's special mask values. This avoids the overhead of performing a shuffle on a zero-filled vector just to get the special constant when the memory load suffices. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@67067 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 73607bf2519..d1486220099 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -200,182 +200,212 @@ namespace { return retval; } -} -namespace { + //! Generate the carry-generate shuffle mask. + SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) { + SmallVector ShufBytes; -//===--------------------------------------------------------------------===// -/// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine -/// instructions for SelectionDAG operations. -/// -class SPUDAGToDAGISel : - public SelectionDAGISel -{ - SPUTargetMachine &TM; - SPUTargetLowering &SPUtli; - unsigned GlobalBaseReg; - -public: - explicit SPUDAGToDAGISel(SPUTargetMachine &tm) : - SelectionDAGISel(tm), - TM(tm), - SPUtli(*tm.getTargetLowering()) - { } - - virtual bool runOnFunction(Function &Fn) { - // Make sure we re-emit a set of the global base reg if necessary - GlobalBaseReg = 0; - SelectionDAGISel::runOnFunction(Fn); - return true; - } + // Create the shuffle mask for "rotating" the borrow up one register slot + // once the borrow is generated. + ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); - /// getI32Imm - Return a target constant with the specified value, of type - /// i32. - inline SDValue getI32Imm(uint32_t Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i32); + return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufBytes[0], ShufBytes.size()); } - /// getI64Imm - Return a target constant with the specified value, of type - /// i64. - inline SDValue getI64Imm(uint64_t Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i64); + //! Generate the borrow-generate shuffle mask + SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) { + SmallVector ShufBytes; + + // Create the shuffle mask for "rotating" the borrow up one register slot + // once the borrow is generated. + ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufBytes[0], ShufBytes.size()); } - /// getSmallIPtrImm - Return a target constant of pointer type. - inline SDValue getSmallIPtrImm(unsigned Imm) { - return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); + //===------------------------------------------------------------------===// + /// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine + /// instructions for SelectionDAG operations. + /// + class SPUDAGToDAGISel : + public SelectionDAGISel + { + SPUTargetMachine &TM; + SPUTargetLowering &SPUtli; + unsigned GlobalBaseReg; + + public: + explicit SPUDAGToDAGISel(SPUTargetMachine &tm) : + SelectionDAGISel(tm), + TM(tm), + SPUtli(*tm.getTargetLowering()) + { } + + virtual bool runOnFunction(Function &Fn) { + // Make sure we re-emit a set of the global base reg if necessary + GlobalBaseReg = 0; + SelectionDAGISel::runOnFunction(Fn); + return true; } - SDNode *emitBuildVector(SDValue build_vec) { - MVT vecVT = build_vec.getValueType(); - SDNode *bvNode = build_vec.getNode(); - DebugLoc dl = bvNode->getDebugLoc(); - - // Check to see if this vector can be represented as a CellSPU immediate - // constant by invoking all of the instruction selection predicates: - if (((vecVT == MVT::v8i16) && - (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) || - ((vecVT == MVT::v4i32) && - ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || - (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || - (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || - (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) || - ((vecVT == MVT::v2i64) && - ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || - (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || - (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) - return Select(build_vec); - - // No, need to emit a constant pool spill: - std::vector CV; - - for (size_t i = 0; i < build_vec.getNumOperands(); ++i) { - ConstantSDNode *V = dyn_cast (build_vec.getOperand(i)); - CV.push_back(const_cast (V->getConstantIntValue())); + /// getI32Imm - Return a target constant with the specified value, of type + /// i32. + inline SDValue getI32Imm(uint32_t Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); } - Constant *CP = ConstantVector::get(CV); - SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy()); - unsigned Alignment = cast(CPIdx)->getAlignment(); - SDValue CGPoolOffset = - SPU::LowerConstantPool(CPIdx, *CurDAG, - SPUtli.getSPUTargetMachine()); - return SelectCode(CurDAG->getLoad(build_vec.getValueType(), dl, - CurDAG->getEntryNode(), CGPoolOffset, - PseudoSourceValue::getConstantPool(), 0, - false, Alignment)); - } + /// getI64Imm - Return a target constant with the specified value, of type + /// i64. + inline SDValue getI64Imm(uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i64); + } - /// Select - Convert the specified operand from a target-independent to a - /// target-specific node if it hasn't already been changed. - SDNode *Select(SDValue Op); - - //! Emit the instruction sequence for i64 shl - SDNode *SelectSHLi64(SDValue &Op, MVT OpVT); - - //! Emit the instruction sequence for i64 srl - SDNode *SelectSRLi64(SDValue &Op, MVT OpVT); - - //! Emit the instruction sequence for i64 sra - SDNode *SelectSRAi64(SDValue &Op, MVT OpVT); - - //! Emit the necessary sequence for loading i64 constants: - SDNode *SelectI64Constant(SDValue &Op, MVT OpVT); - - //! Returns true if the address N is an A-form (local store) address - bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, - SDValue &Index); - - //! D-form address predicate - bool SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base, - SDValue &Index); - - /// Alternate D-form address using i7 offset predicate - bool SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp, - SDValue &Base); - - /// D-form address selection workhorse - bool DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Disp, - SDValue &Base, int minOffset, int maxOffset); - - //! Address predicate if N can be expressed as an indexed [r+r] operation. - bool SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base, - SDValue &Index); - - /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for - /// inline asm expressions. - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector &OutOps) { - SDValue Op0, Op1; - switch (ConstraintCode) { - default: return true; - case 'm': // memory - if (!SelectDFormAddr(Op, Op, Op0, Op1) - && !SelectAFormAddr(Op, Op, Op0, Op1)) - SelectXFormAddr(Op, Op, Op0, Op1); - break; - case 'o': // offsetable - if (!SelectDFormAddr(Op, Op, Op0, Op1) - && !SelectAFormAddr(Op, Op, Op0, Op1)) { - Op0 = Op; - Op1 = getSmallIPtrImm(0); + /// getSmallIPtrImm - Return a target constant of pointer type. + inline SDValue getSmallIPtrImm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); } - break; - case 'v': // not offsetable + + SDNode *emitBuildVector(SDValue build_vec) { + MVT vecVT = build_vec.getValueType(); + MVT eltVT = vecVT.getVectorElementType(); + SDNode *bvNode = build_vec.getNode(); + DebugLoc dl = bvNode->getDebugLoc(); + + // Check to see if this vector can be represented as a CellSPU immediate + // constant by invoking all of the instruction selection predicates: + if (((vecVT == MVT::v8i16) && + (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) || + ((vecVT == MVT::v4i32) && + ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || + (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || + (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || + (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) || + ((vecVT == MVT::v2i64) && + ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || + (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || + (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) + return Select(build_vec); + + // No, need to emit a constant pool spill: + std::vector CV; + + for (size_t i = 0; i < build_vec.getNumOperands(); ++i) { + ConstantSDNode *V = dyn_cast (build_vec.getOperand(i)); + CV.push_back(const_cast (V->getConstantIntValue())); + } + + Constant *CP = ConstantVector::get(CV); + SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy()); + unsigned Alignment = cast(CPIdx)->getAlignment(); + SDValue CGPoolOffset = + SPU::LowerConstantPool(CPIdx, *CurDAG, + SPUtli.getSPUTargetMachine()); + return SelectCode(CurDAG->getLoad(build_vec.getValueType(), dl, + CurDAG->getEntryNode(), CGPoolOffset, + PseudoSourceValue::getConstantPool(), 0, + false, Alignment)); + } + + /// Select - Convert the specified operand from a target-independent to a + /// target-specific node if it hasn't already been changed. + SDNode *Select(SDValue Op); + + //! Emit the instruction sequence for i64 shl + SDNode *SelectSHLi64(SDValue &Op, MVT OpVT); + + //! Emit the instruction sequence for i64 srl + SDNode *SelectSRLi64(SDValue &Op, MVT OpVT); + + //! Emit the instruction sequence for i64 sra + SDNode *SelectSRAi64(SDValue &Op, MVT OpVT); + + //! Emit the necessary sequence for loading i64 constants: + SDNode *SelectI64Constant(SDValue &Op, MVT OpVT, DebugLoc dl); + + //! Alternate instruction emit sequence for loading i64 constants + SDNode *SelectI64Constant(uint64_t i64const, MVT OpVT, DebugLoc dl); + + //! Returns true if the address N is an A-form (local store) address + bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, + SDValue &Index); + + //! D-form address predicate + bool SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base, + SDValue &Index); + + /// Alternate D-form address using i7 offset predicate + bool SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp, + SDValue &Base); + + /// D-form address selection workhorse + bool DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Disp, + SDValue &Base, int minOffset, int maxOffset); + + //! Address predicate if N can be expressed as an indexed [r+r] operation. + bool SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base, + SDValue &Index); + + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector &OutOps) { + SDValue Op0, Op1; + switch (ConstraintCode) { + default: return true; + case 'm': // memory + if (!SelectDFormAddr(Op, Op, Op0, Op1) + && !SelectAFormAddr(Op, Op, Op0, Op1)) + SelectXFormAddr(Op, Op, Op0, Op1); + break; + case 'o': // offsetable + if (!SelectDFormAddr(Op, Op, Op0, Op1) + && !SelectAFormAddr(Op, Op, Op0, Op1)) { + Op0 = Op; + Op1 = getSmallIPtrImm(0); + } + break; + case 'v': // not offsetable #if 1 - assert(0 && "InlineAsmMemoryOperand 'v' constraint not handled."); + assert(0 && "InlineAsmMemoryOperand 'v' constraint not handled."); #else - SelectAddrIdxOnly(Op, Op, Op0, Op1); + SelectAddrIdxOnly(Op, Op, Op0, Op1); #endif - break; - } + break; + } - OutOps.push_back(Op0); - OutOps.push_back(Op1); - return false; - } + OutOps.push_back(Op0); + OutOps.push_back(Op1); + return false; + } - /// InstructionSelect - This callback is invoked by - /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. - virtual void InstructionSelect(); + /// InstructionSelect - This callback is invoked by + /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. + virtual void InstructionSelect(); - virtual const char *getPassName() const { - return "Cell SPU DAG->DAG Pattern Instruction Selection"; - } + virtual const char *getPassName() const { + return "Cell SPU DAG->DAG Pattern Instruction Selection"; + } - /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for - /// this target when scheduling the DAG. - virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() { - const TargetInstrInfo *II = TM.getInstrInfo(); - assert(II && "No InstrInfo?"); - return new SPUHazardRecognizer(*II); - } + /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for + /// this target when scheduling the DAG. + virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() { + const TargetInstrInfo *II = TM.getInstrInfo(); + assert(II && "No InstrInfo?"); + return new SPUHazardRecognizer(*II); + } - // Include the pieces autogenerated from the target description. + // Include the pieces autogenerated from the target description. #include "SPUGenDAGISel.inc" -}; - + }; } /// InstructionSelect - This callback is invoked by @@ -689,7 +719,7 @@ SPUDAGToDAGISel::Select(SDValue Op) { // Catch the i64 constants that end up here. Note: The backend doesn't // attempt to legalize the constant (it's useless because DAGCombiner // will insert 64-bit constants and we can't stop it). - return SelectI64Constant(Op, OpVT); + return SelectI64Constant(Op, OpVT, Op.getDebugLoc()); } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) && OpVT == MVT::i64) { SDValue Op0 = Op.getOperand(0); @@ -747,21 +777,21 @@ SPUDAGToDAGISel::Select(SDValue Op) { zextShuffle)); } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = - emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG, dl)); + emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl)); return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT, Op.getOperand(0), Op.getOperand(1), SDValue(CGLoad, 0))); } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = - emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG, dl)); + emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl)); return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT, Op.getOperand(0), Op.getOperand(1), SDValue(CGLoad, 0))); } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = - emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG, dl)); + emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl)); return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT, Op.getOperand(0), Op.getOperand(1), @@ -813,6 +843,54 @@ SPUDAGToDAGISel::Select(SDValue Op) { if (OpVT == MVT::i64) { return SelectSRAi64(Op, OpVT); } + } else if (Opc == ISD::FNEG + && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) { + DebugLoc dl = Op.getDebugLoc(); + // Check if the pattern is a special form of DFNMS: + // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC)) + SDValue Op0 = Op.getOperand(0); + if (Op0.getOpcode() == ISD::FSUB) { + SDValue Op00 = Op0.getOperand(0); + if (Op00.getOpcode() == ISD::FMUL) { + unsigned Opc = SPU::DFNMSf64; + if (OpVT == MVT::v2f64) + Opc = SPU::DFNMSv2f64; + + return CurDAG->getTargetNode(Opc, dl, OpVT, + Op00.getOperand(0), + Op00.getOperand(1), + Op0.getOperand(1)); + } + } + + SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64); + SDNode *signMask = 0; + unsigned Opc = SPU::ORfneg64; + + if (OpVT == MVT::f64) { + signMask = SelectI64Constant(negConst, MVT::i64, dl); + } else if (OpVT == MVT::v2f64) { + Opc = SPU::ORfnegvec; + signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl, + MVT::v2i64, + negConst, negConst)); + } + + return CurDAG->getTargetNode(Opc, dl, OpVT, + Op.getOperand(0), SDValue(signMask, 0)); + } else if (Opc == ISD::FABS) { + if (OpVT == MVT::f64) { + SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl); + return CurDAG->getTargetNode(SPU::ANDfabs64, dl, OpVT, + Op.getOperand(0), SDValue(signMask, 0)); + } else if (OpVT == MVT::v2f64) { + SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64); + SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, + absConst, absConst); + SDNode *signMask = emitBuildVector(absVec); + return CurDAG->getTargetNode(SPU::ANDfabsvec, dl, OpVT, + Op.getOperand(0), SDValue(signMask, 0)); + } } else if (Opc == SPUISD::LDRESULT) { // Custom select instructions for LDRESULT MVT VT = N->getValueType(0); @@ -1087,13 +1165,17 @@ SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) { /*! Do the necessary magic necessary to load a i64 constant */ -SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT) { +SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT, + DebugLoc dl) { ConstantSDNode *CN = cast(Op.getNode()); - // Currently there's no DL on the input, but won't hurt to pretend. - DebugLoc dl = Op.getDebugLoc(); + return SelectI64Constant(CN->getZExtValue(), OpVT, dl); +} + +SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, MVT OpVT, + DebugLoc dl) { MVT OpVecVT = MVT::getVectorVT(OpVT, 2); SDValue i64vec = - SPU::LowerSplat_v2i64(OpVecVT, *CurDAG, CN->getZExtValue(), dl); + SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl); // Here's where it gets interesting, because we have to parse out the // subtree handed back in i64vec: @@ -1143,8 +1225,11 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT) { SDValue(lhsNode, 0), SDValue(rhsNode, 0), SDValue(shufMaskNode, 0))); - return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, + return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(shufNode, 0)); + } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) { + return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, + SDValue(emitBuildVector(i64vec), 0)); } else { cerr << "SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec condition\n"; abort(); diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index e840ee0747f..43248dd7bec 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -1,5 +1,5 @@ -//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// // +//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source @@ -1353,7 +1353,7 @@ getVecImm(SDNode *N) { } } - return 0; // All UNDEF: use implicit def.; not Constant node + return 0; } /// get_vec_i18imm - Test if this vector is a vector filled with the same value @@ -1480,131 +1480,30 @@ SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -// If this is a vector of constants or undefs, get the bits. A bit in -// UndefBits is set if the corresponding element of the vector is an -// ISD::UNDEF value. For undefs, the corresponding VectorBits values are -// zero. Return true if this is not an array of constants, false if it is. -// -static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], - uint64_t UndefBits[2]) { - // Start with zero'd results. - VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; - - unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits(); - for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { - SDValue OpVal = BV->getOperand(i); - - unsigned PartNo = i >= e/2; // In the upper 128 bits? - unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t. - - uint64_t EltBits = 0; - if (OpVal.getOpcode() == ISD::UNDEF) { - uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize); - UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); - continue; - } else if (ConstantSDNode *CN = dyn_cast(OpVal)) { - EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize)); - } else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) { - const APFloat &apf = CN->getValueAPF(); - EltBits = (CN->getValueType(0) == MVT::f32 - ? FloatToBits(apf.convertToFloat()) - : DoubleToBits(apf.convertToDouble())); - } else { - // Nonconstant element. - return true; - } - - VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); - } - - //printf("%llx %llx %llx %llx\n", - // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); - return false; -} - -/// If this is a splat (repetition) of a value across the whole vector, return -/// the smallest size that splats it. For example, "0x01010101010101..." is a -/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and -/// SplatSize = 1 byte. -static bool isConstantSplat(const uint64_t Bits128[2], - const uint64_t Undef128[2], - int MinSplatBits, - uint64_t &SplatBits, uint64_t &SplatUndef, - int &SplatSize) { - // Don't let undefs prevent splats from matching. See if the top 64-bits are - // the same as the lower 64-bits, ignoring undefs. - uint64_t Bits64 = Bits128[0] | Bits128[1]; - uint64_t Undef64 = Undef128[0] & Undef128[1]; - uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32); - uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32); - uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16); - uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16); - - if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) { - if (MinSplatBits < 64) { - - // Check that the top 32-bits are the same as the lower 32-bits, ignoring - // undefs. - if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) { - if (MinSplatBits < 32) { - - // If the top 16-bits are different than the lower 16-bits, ignoring - // undefs, we have an i32 splat. - if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) { - if (MinSplatBits < 16) { - // If the top 8-bits are different than the lower 8-bits, ignoring - // undefs, we have an i16 splat. - if ((Bits16 & (uint16_t(~Undef16) >> 8)) - == ((Bits16 >> 8) & ~Undef16)) { - // Otherwise, we have an 8-bit splat. - SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8); - SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8); - SplatSize = 1; - return true; - } - } else { - SplatBits = Bits16; - SplatUndef = Undef16; - SplatSize = 2; - return true; - } - } - } else { - SplatBits = Bits32; - SplatUndef = Undef32; - SplatSize = 4; - return true; - } - } - } else { - SplatBits = Bits128[0]; - SplatUndef = Undef128[0]; - SplatSize = 8; - return true; - } - } - - return false; // Can't be a splat if two pieces don't match. -} - //! Lower a BUILD_VECTOR instruction creatively: SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType(); + MVT EltVT = VT.getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); - // If this is a vector of constants or undefs, get the bits. A bit in - // UndefBits is set if the corresponding element of the vector is an - // ISD::UNDEF value. For undefs, the corresponding VectorBits values are - // zero. - uint64_t VectorBits[2]; - uint64_t UndefBits[2]; - uint64_t SplatBits, SplatUndef; - int SplatSize; - if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits) - || !isConstantSplat(VectorBits, UndefBits, - VT.getVectorElementType().getSizeInBits(), - SplatBits, SplatUndef, SplatSize)) - return SDValue(); // Not a constant vector, not a splat. + BuildVectorSDNode *BCN = dyn_cast(Op.getNode()); + assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR"); + unsigned minSplatBits = EltVT.getSizeInBits(); + + if (minSplatBits < 16) + minSplatBits = 16; + + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, minSplatBits) + || minSplatBits < SplatBitSize) + return SDValue(); // Wasn't a constant vector or splat exceeded min + + uint64_t SplatBits = APSplatBits.getZExtValue(); + unsigned SplatSize = SplatBitSize / 8; switch (VT.getSimpleVT()) { default: @@ -1620,8 +1519,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // NOTE: pretend the constant is an integer. LLVM won't load FP constants SDValue T = DAG.getConstant(Value32, MVT::i32); return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, - DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::v4i32, T, T, T, T)); + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T, T, T, T)); break; } case MVT::v2f64: { @@ -1636,45 +1534,42 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { } case MVT::v16i8: { // 8-bit constants have to be expanded to 16-bits - unsigned short Value16 = SplatBits | (SplatBits << 8); - SDValue Ops[8]; - for (int i = 0; i < 8; ++i) - Ops[i] = DAG.getConstant(Value16, MVT::i16); + unsigned short Value16 = SplatBits /* | (SplatBits << 8) */; + SmallVector Ops; + + Ops.assign(8, DAG.getConstant(Value16, MVT::i16)); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, Ops, 8)); + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size())); } case MVT::v8i16: { - unsigned short Value16; - if (SplatSize == 2) - Value16 = (unsigned short) (SplatBits & 0xffff); - else - Value16 = (unsigned short) (SplatBits | (SplatBits << 8)); - SDValue T = DAG.getConstant(Value16, VT.getVectorElementType()); - SDValue Ops[8]; - for (int i = 0; i < 8; ++i) Ops[i] = T; - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops, 8); + unsigned short Value16 = SplatBits; + SDValue T = DAG.getConstant(Value16, EltVT); + SmallVector Ops; + + Ops.assign(8, T); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); } case MVT::v4i32: { - unsigned int Value = SplatBits; - SDValue T = DAG.getConstant(Value, VT.getVectorElementType()); + SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); } case MVT::v2i32: { - unsigned int Value = SplatBits; - SDValue T = DAG.getConstant(Value, VT.getVectorElementType()); + SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T); } case MVT::v2i64: { - return SPU::LowerSplat_v2i64(VT, DAG, SplatBits, dl); + return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); } } return SDValue(); } +/*! + */ SDValue -SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, - DebugLoc dl) { +SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, + DebugLoc dl) { uint32_t upper = uint32_t(SplatVal >> 32); uint32_t lower = uint32_t(SplatVal); @@ -1685,10 +1580,6 @@ SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Val, Val, Val, Val)); } else { - SDValue LO32; - SDValue HI32; - SmallVector ShufBytes; - SDValue Result; bool upper_special, lower_special; // NOTE: This code creates common-case shuffle masks that can be easily @@ -1699,6 +1590,18 @@ SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); + // Both upper and lower are special, lower to a constant pool load: + if (lower_special && upper_special) { + SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64); + return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, + SplatValCN, SplatValCN); + } + + SDValue LO32; + SDValue HI32; + SmallVector ShufBytes; + SDValue Result; + // Create lower vector if not a special pattern if (!lower_special) { SDValue LO32C = DAG.getConstant(lower, MVT::i32); @@ -1721,13 +1624,6 @@ SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, LO32 = HI32; if (upper_special) HI32 = LO32; - if (lower_special && upper_special) { - // Unhappy situation... both upper and lower are special, so punt with - // a target constant: - SDValue Zero = DAG.getConstant(0, MVT::i32); - HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Zero, Zero, - Zero, Zero); - } for (int i = 0; i < 4; ++i) { uint64_t val = 0; @@ -2022,9 +1918,9 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { ShufMask[i] = DAG.getConstant(bits, MVT::i32); } - SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - &ShufMask[0], - sizeof(ShufMask) / sizeof(ShufMask[0])); + SDValue ShufMaskVec = + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0])); retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(), @@ -2067,28 +1963,28 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { /*NOTREACHED*/ case MVT::i8: { SDValue factor = DAG.getConstant(0x00000000, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, factor, factor, - factor, factor); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); break; } case MVT::i16: { SDValue factor = DAG.getConstant(0x00010001, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, factor, factor, - factor, factor); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); break; } case MVT::i32: case MVT::f32: { SDValue factor = DAG.getConstant(0x00010203, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, factor, factor, - factor, factor); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); break; } case MVT::i64: case MVT::f64: { SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32); SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, loFactor, hiFactor, loFactor, hiFactor); break; } @@ -2164,71 +2060,65 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, case ISD::ROTR: case ISD::ROTL: { SDValue N1 = Op.getOperand(1); - unsigned N1Opc; - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getZExtValue(), - MVT::i16)); - N1Opc = N1.getValueType().bitsLT(ShiftVT) - ? ISD::ZERO_EXTEND - : ISD::TRUNCATE; - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, dl, ShiftVT, N1) - : DAG.getConstant(cast(N1)->getZExtValue(), - TLI.getShiftAmountTy())); + MVT N1VT = N1.getValueType(); + + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT) + ? ISD::ZERO_EXTEND + : ISD::TRUNCATE; + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); + } + + // Replicate lower 8-bits into upper 8: SDValue ExpandArg = DAG.getNode(ISD::OR, dl, MVT::i16, N0, DAG.getNode(ISD::SHL, dl, MVT::i16, N0, DAG.getConstant(8, MVT::i32))); + + // Truncate back down to i8 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1)); } case ISD::SRL: case ISD::SHL: { SDValue N1 = Op.getOperand(1); - unsigned N1Opc; - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getZExtValue(), - MVT::i32)); - N1Opc = N1.getValueType().bitsLT(ShiftVT) - ? ISD::ZERO_EXTEND - : ISD::TRUNCATE; - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, dl, ShiftVT, N1) - : DAG.getConstant(cast(N1)->getZExtValue(), ShiftVT)); + MVT N1VT = N1.getValueType(); + + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = ISD::ZERO_EXTEND; + + if (N1.getValueType().bitsGT(ShiftVT)) + N1Opc = ISD::TRUNCATE; + + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); + } + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, DAG.getNode(Opc, dl, MVT::i16, N0, N1)); } case ISD::SRA: { SDValue N1 = Op.getOperand(1); - unsigned N1Opc; - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getSExtValue(), - MVT::i16)); - N1Opc = N1.getValueType().bitsLT(ShiftVT) - ? ISD::SIGN_EXTEND - : ISD::TRUNCATE; - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, dl, ShiftVT, N1) - : DAG.getConstant(cast(N1)->getZExtValue(), - ShiftVT)); + MVT N1VT = N1.getValueType(); + + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = ISD::SIGN_EXTEND; + + if (N1VT.bitsGT(ShiftVT)) + N1Opc = ISD::TRUNCATE; + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); + } + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, DAG.getNode(Opc, dl, MVT::i16, N0, N1)); } case ISD::MUL: { SDValue N1 = Op.getOperand(1); - unsigned N1Opc; - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getZExtValue(), - MVT::i16)); - N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE; - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, dl, MVT::i16, N1) - : DAG.getConstant(cast(N1)->getSExtValue(), - MVT::i16)); + + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, DAG.getNode(Opc, dl, MVT::i16, N0, N1)); break; @@ -2238,36 +2128,6 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, return SDValue(); } -//! Generate the carry-generate shuffle mask. -SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) { - SmallVector ShufBytes; - - // Create the shuffle mask for "rotating" the borrow up one register slot - // once the borrow is generated. - ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); - - return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - &ShufBytes[0], ShufBytes.size()); -} - -//! Generate the borrow-generate shuffle mask -SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) { - SmallVector ShufBytes; - - // Create the shuffle mask for "rotating" the borrow up one register slot - // once the borrow is generated. - ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); - - return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - &ShufBytes[0], ShufBytes.size()); -} - //! Lower byte immediate operations for v16i8 vectors: static SDValue LowerByteImmed(SDValue Op, SelectionDAG &DAG) { @@ -2291,26 +2151,24 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) { } if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) { - uint64_t VectorBits[2]; - uint64_t UndefBits[2]; - uint64_t SplatBits, SplatUndef; - int SplatSize; - - if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits) - && isConstantSplat(VectorBits, UndefBits, - VT.getVectorElementType().getSizeInBits(), - SplatBits, SplatUndef, SplatSize)) { - SDValue tcVec[16]; + BuildVectorSDNode *BCN = dyn_cast(ConstVec.getNode()); + assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed"); + + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned minSplatBits = VT.getVectorElementType().getSizeInBits(); + + if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, minSplatBits) + && minSplatBits <= SplatBitSize) { + uint64_t SplatBits = APSplatBits.getZExtValue(); SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); - const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]); - - // Turn the BUILD_VECTOR into a set of target constants: - for (size_t i = 0; i < tcVecSize; ++i) - tcVec[i] = tc; + SmallVector tcVec; + tcVec.assign(16, tc); return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, - tcVec, tcVecSize)); + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size())); } } @@ -2452,7 +2310,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); } - return Op; // return unmolested, legalized op + return SDValue(); } //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32 @@ -2478,7 +2336,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); } - return Op; // return unmolested, legalized + return SDValue(); } //! Lower ISD::SETCC diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 9f1d9558c17..866c632d527 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -78,11 +78,9 @@ namespace llvm { SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM); - SDValue LowerSplat_v2i64(MVT OpVT, SelectionDAG &DAG, uint64_t splat, + //! Simplify a MVT::v2i64 constant splat to CellSPU-ready form + SDValue LowerV2I64Splat(MVT OpVT, SelectionDAG &DAG, uint64_t splat, DebugLoc dl); - - SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl); - SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl); } class SPUTargetMachine; // forward dec'l. diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 8623f30112e..135164f3d96 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -60,9 +60,6 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, unsigned& SrcSR, unsigned& DstSR) const { SrcSR = DstSR = 0; // No sub-registers. - // Primarily, ORI and OR are generated by copyRegToReg. But, there are other - // cases where we can safely say that what's being done is really a move - // (see how PowerPC does this -- it's the model for this code too.) switch (MI.getOpcode()) { default: break; @@ -167,7 +164,7 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, MI.getOperand(1).isReg() && "invalid SPU OR_ or LR instruction!"); if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) { - sourceReg = MI.getOperand(0).getReg(); + sourceReg = MI.getOperand(1).getReg(); destReg = MI.getOperand(0).getReg(); return true; } diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index e1d9228ef93..86eb61cf86c 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -1258,10 +1258,9 @@ multiclass BitwiseAnd def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB), [/* Intentionally does not match a pattern */]>; - def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, VECREG:$rB), + def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB), [/* Intentionally does not match a pattern */]>; - // Could use v4i32, but won't for clarity def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [/* Intentionally does not match a pattern */]>; @@ -1288,10 +1287,11 @@ class ANDCInst pattern>: RRForm<0b10000011010, OOL, IOL, "andc\t$rT, $rA, $rB", IntegerOp, pattern>; -class ANDCVecInst: +class ANDCVecInst: ANDCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), (and (vectype VECREG:$rA), - (vnot (vectype VECREG:$rB))))]>; + [(set (vectype VECREG:$rT), + (and (vectype VECREG:$rA), + (vnot_frag (vectype VECREG:$rB))))]>; class ANDCRegInst: ANDCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), @@ -1309,6 +1309,9 @@ multiclass AndComplement def r32: ANDCRegInst; def r16: ANDCRegInst; def r8: ANDCRegInst; + + // Sometimes, the xor pattern has a bitcast constant: + def v16i8_conv: ANDCVecInst; } defm ANDC : AndComplement; @@ -1480,6 +1483,17 @@ multiclass BitwiseOr def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), [/* no pattern */]>; + // OR instructions used to negate f32 and f64 quantities. + + def fneg32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB), + [/* no pattern */]>; + + def fneg64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB), + [/* no pattern */]>; + + def fnegvec: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [/* no pattern, see fneg{32,64} */]>; + // scalar->vector promotion, prefslot2vec: def v16i8_i8: ORPromoteScalar; def v8i16_i16: ORPromoteScalar; @@ -1783,18 +1797,6 @@ multiclass BitwiseExclusiveOr def r32: XORRegInst; def r16: XORRegInst; def r8: XORRegInst; - - // Special forms for floating point instructions. - // fneg and fabs require bitwise logical ops to manipulate the sign bit. - - def fneg32: XORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB), - [/* no pattern */]>; - - def fneg64: XORInst<(outs R64FP:$rT), (ins R64FP:$rA, VECREG:$rB), - [/* no pattern */]>; - - def fnegvec: XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [/* no pattern, see fneg{32,64} */]>; } defm XOR : BitwiseExclusiveOr; @@ -4239,33 +4241,36 @@ def FMSv2f64 : (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)), (v2f64 VECREG:$rC)))]>; -// FNMS: - (a * b - c) +// DFNMS: - (a * b - c) // - (a * b) + c => c - (a * b) -def FNMSf64 : - RRForm<0b01111010110, (outs R64FP:$rT), - (ins R64FP:$rA, R64FP:$rB, R64FP:$rC), - "dfnms\t$rT, $rA, $rB", DPrecFP, - [(set R64FP:$rT, (fsub R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB)))]>, + +class DFNMSInst pattern>: + RRForm<0b01111010110, OOL, IOL, "dfnms\t$rT, $rA, $rB", + DPrecFP, pattern>, RegConstraint<"$rC = $rT">, NoEncode<"$rC">; -def : Pat<(fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC)), - (FNMSf64 R64FP:$rA, R64FP:$rB, R64FP:$rC)>; +class DFNMSVecInst pattern>: + DFNMSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + pattern>; -def FNMSv2f64 : - RRForm<0b01111010110, (outs VECREG:$rT), - (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "dfnms\t$rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), - (fsub (v2f64 VECREG:$rC), - (fmul (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB))))]>, - RegConstraint<"$rC = $rT">, - NoEncode<"$rC">; +class DFNMSRegInst pattern>: + DFNMSInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R64FP:$rC), + pattern>; -def : Pat<(fneg (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)), - (v2f64 VECREG:$rC))), - (FNMSv2f64 VECREG:$rA, VECREG:$rB, VECREG:$rC)>; +multiclass DFMultiplySubtract +{ + def v2f64 : DFNMSVecInst<[(set (v2f64 VECREG:$rT), + (fsub (v2f64 VECREG:$rC), + (fmul (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB))))]>; + + def f64 : DFNMSRegInst<[(set R64FP:$rT, + (fsub R64FP:$rC, + (fmul R64FP:$rA, R64FP:$rB)))]>; +} + +defm DFNMS : DFMultiplySubtract; // - (a * b + c) // - (a * b) - c @@ -4293,35 +4298,21 @@ def FNMAv2f64 : //===----------------------------------------------------------------------==// def : Pat<(fneg (v4f32 VECREG:$rA)), - (XORfnegvec (v4f32 VECREG:$rA), - (v4f32 (ILHUv4i32 0x8000)))>; + (ORfnegvec (v4f32 VECREG:$rA), + (v4f32 (ILHUv4i32 0x8000)))>; def : Pat<(fneg R32FP:$rA), - (XORfneg32 R32FP:$rA, (ILHUr32 0x8000))>; - -def : Pat<(fneg (v2f64 VECREG:$rA)), - (XORfnegvec (v2f64 VECREG:$rA), - (v2f64 (ANDBIv16i8 (FSMBIv16i8 0x8080), 0x80)))>; - -def : Pat<(fneg R64FP:$rA), - (XORfneg64 R64FP:$rA, - (ANDBIv16i8 (FSMBIv16i8 0x8080), 0x80))>; + (ORfneg32 R32FP:$rA, (ILHUr32 0x8000))>; // Floating point absolute value +// Note: f64 fabs is custom-selected. def : Pat<(fabs R32FP:$rA), (ANDfabs32 R32FP:$rA, (IOHLr32 (ILHUr32 0x7fff), 0xffff))>; def : Pat<(fabs (v4f32 VECREG:$rA)), (ANDfabsvec (v4f32 VECREG:$rA), - (v4f32 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>; - -def : Pat<(fabs R64FP:$rA), - (ANDfabs64 R64FP:$rA, (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f))>; - -def : Pat<(fabs (v2f64 VECREG:$rA)), - (ANDfabsvec (v2f64 VECREG:$rA), - (v2f64 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>; + (IOHLv4i32 (ILHUv4i32 0x7fff), 0xffff))>; //===----------------------------------------------------------------------===// // Hint for branch instructions: diff --git a/test/CodeGen/CellSPU/2009-01-01-BrCond.ll b/test/CodeGen/CellSPU/2009-01-01-BrCond.ll index 3002bbc2517..75e0ed0cd2f 100644 --- a/test/CodeGen/CellSPU/2009-01-01-BrCond.ll +++ b/test/CodeGen/CellSPU/2009-01-01-BrCond.ll @@ -8,11 +8,11 @@ target triple = "spu" define double @__floatunsidf(i32 %arg_a) nounwind { entry: - %in = alloca %struct.fp_number_type, align 8 ; <%struct.fp_number_type*> [#uses=5] - %0 = getelementptr %struct.fp_number_type* %in, i32 0, i32 1 ; [#uses=1] + %in = alloca %struct.fp_number_type, align 16 + %0 = getelementptr %struct.fp_number_type* %in, i32 0, i32 1 store i32 0, i32* %0, align 4 - %1 = icmp eq i32 %arg_a, 0 ; [#uses=1] - %2 = getelementptr %struct.fp_number_type* %in, i32 0, i32 0 ; [#uses=2] + %1 = icmp eq i32 %arg_a, 0 + %2 = getelementptr %struct.fp_number_type* %in, i32 0, i32 0 br i1 %1, label %bb, label %bb1 bb: ; preds = %entry @@ -26,6 +26,6 @@ bb7: ; preds = %bb5, %bb1, %bb ret double 1.0 } -declare i32 @llvm.ctlz.i32(i32) nounwind readnone +; declare i32 @llvm.ctlz.i32(i32) nounwind readnone declare double @__pack_d(%struct.fp_number_type*) diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll index b6eca10803e..e8374b3dae2 100644 --- a/test/CodeGen/CellSPU/fneg-fabs.ll +++ b/test/CodeGen/CellSPU/fneg-fabs.ll @@ -1,9 +1,7 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s -; RUN: grep fsmbi %t1.s | count 3 ; RUN: grep 32768 %t1.s | count 2 -; RUN: grep xor %t1.s | count 4 -; RUN: grep and %t1.s | count 5 -; RUN: grep andbi %t1.s | count 3 +; RUN: grep or %t1.s | count 4 +; RUN: grep and %t1.s | count 2 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" target triple = "spu"