From c9c8b2a804b2cd3d33a6a965e06a21ff93968f97 Mon Sep 17 00:00:00 2001 From: Scott Michel Date: Mon, 26 Jan 2009 03:31:40 +0000 Subject: [PATCH] CellSPU: - Rename fcmp.ll test to fcmp32.ll, start adding new double tests to fcmp64.ll - Fix select_bits.ll test - Capitulate to the DAGCombiner and move i64 constant loads to instruction selection (SPUISelDAGtoDAG.cpp). DAGCombiner will insert all kinds of 64-bit optimizations after operation legalization occurs and now we have to do most of the work that instruction selection should be doing twice (once to determine if v2i64 build_vector can be handled by SelectCode(), which then runs all of the predicates a second time to select the necessary instructions.) But, CellSPU is a good citizen. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62990 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CellSPU/SPU64InstrInfo.td | 4 +- lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 173 +++++-- lib/Target/CellSPU/SPUISelLowering.cpp | 484 ++++++++++++-------- lib/Target/CellSPU/SPUISelLowering.h | 4 +- lib/Target/CellSPU/SPUInstrInfo.cpp | 4 +- lib/Target/CellSPU/SPUInstrInfo.td | 30 +- test/CodeGen/CellSPU/{fcmp.ll => fcmp32.ll} | 15 +- test/CodeGen/CellSPU/fcmp64.ll | 7 + test/CodeGen/CellSPU/fneg-fabs.ll | 11 +- test/CodeGen/CellSPU/select_bits.ll | 114 ++--- test/CodeGen/CellSPU/shift_ops.ll | 6 + 11 files changed, 552 insertions(+), 300 deletions(-) rename test/CodeGen/CellSPU/{fcmp.ll => fcmp32.ll} (56%) create mode 100644 test/CodeGen/CellSPU/fcmp64.ll diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td index 33298946c52..06eb1496def 100644 --- a/lib/Target/CellSPU/SPU64InstrInfo.td +++ b/lib/Target/CellSPU/SPU64InstrInfo.td @@ -30,8 +30,8 @@ // selb instruction definition for i64. Note that the selection mask is // a vector, produced by various forms of FSM: def SELBr64_cond: - SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), - [/* no pattern */]>; + SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), + [/* no pattern */]>; // The generic i64 select pattern, which assumes that the comparison result // is in a 32-bit register that contains a select mask pattern (i.e., gather diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 0fc7aec9906..6d7f40d5d75 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -254,26 +254,56 @@ public: /// getSmallIPtrImm - Return a target constant of pointer type. inline SDValue getSmallIPtrImm(unsigned Imm) { return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); - } + } SDNode *emitBuildVector(SDValue build_vec) { + MVT vecVT = build_vec.getValueType(); + SDNode *bvNode = build_vec.getNode(); + bool canBeSelected = false; + + // Check to see if this vector can be represented as a CellSPU immediate + // constant. + if (vecVT == MVT::v8i16) { + if (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0) { + canBeSelected = true; + } + } else if (vecVT == MVT::v4i32) { + if ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) + || (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) + || (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) + || (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0)) { + canBeSelected = true; + } + } else if (vecVT == MVT::v2i64) { + if ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) + || (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) + || (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)) { + canBeSelected = true; + } + } + + if (canBeSelected) { + return Select(build_vec); + } + + // No, need to emit a constant pool spill: std::vector CV; for (size_t i = 0; i < build_vec.getNumOperands(); ++i) { - ConstantSDNode *V = dyn_cast(build_vec.getOperand(i)); - CV.push_back(const_cast(V->getConstantIntValue())); + ConstantSDNode *V = dyn_cast (build_vec.getOperand(i)); + CV.push_back(const_cast (V->getConstantIntValue())); } Constant *CP = ConstantVector::get(CV); SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy()); - unsigned Alignment = 1 << cast(CPIdx)->getAlignment(); + unsigned Alignment = 1 << cast (CPIdx)->getAlignment(); SDValue CGPoolOffset = SPU::LowerConstantPool(CPIdx, *CurDAG, SPUtli.getSPUTargetMachine()); return SelectCode(CurDAG->getLoad(build_vec.getValueType(), - CurDAG->getEntryNode(), CGPoolOffset, - PseudoSourceValue::getConstantPool(), 0, - false, Alignment)); + CurDAG->getEntryNode(), CGPoolOffset, + PseudoSourceValue::getConstantPool(), 0, + false, Alignment)); } /// Select - Convert the specified operand from a target-independent to a @@ -289,6 +319,9 @@ public: //! Emit the instruction sequence for i64 sra SDNode *SelectSRAi64(SDValue &Op, MVT OpVT); + //! Emit the necessary sequence for loading i64 constants: + SDNode *SelectI64Constant(SDValue &Op, MVT OpVT); + //! Returns true if the address N is an A-form (local store) address bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, SDValue &Index); @@ -652,7 +685,9 @@ SPUDAGToDAGISel::Select(SDValue Op) { if (N->isMachineOpcode()) { return NULL; // Already selected. - } else if (Opc == ISD::FrameIndex) { + } + + if (Opc == ISD::FrameIndex) { int FI = cast(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType()); SDValue Imm0 = CurDAG->getTargetConstant(0, Op.getValueType()); @@ -669,6 +704,11 @@ SPUDAGToDAGISel::Select(SDValue Op) { TFI, Imm0), 0); n_ops = 2; } + } else if (Opc == ISD::Constant && OpVT == MVT::i64) { + // Catch the i64 constants that end up here. Note: The backend doesn't + // attempt to legalize the constant (it's useless because DAGCombiner + // will insert 64-bit constants and we can't stop it). + return SelectI64Constant(Op, OpVT); } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) && OpVT == MVT::i64) { SDValue Op0 = Op.getOperand(0); @@ -745,27 +785,38 @@ SPUDAGToDAGISel::Select(SDValue Op) { return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT, Op.getOperand(0), Op.getOperand(1), SDValue(CGLoad, 0))); - } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { - SDNode *CGLoad = - emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG)); - - return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, OpVT, - Op.getOperand(0), Op.getOperand(1), - SDValue(CGLoad, 0))); - } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { - SDNode *CGLoad = - emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG)); - - return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, OpVT, - Op.getOperand(0), Op.getOperand(1), - SDValue(CGLoad, 0))); - } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { - SDNode *CGLoad = - emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG)); - - return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT, - Op.getOperand(0), Op.getOperand(1), - SDValue(CGLoad, 0))); + } else if (Opc == ISD::TRUNCATE) { + SDValue Op0 = Op.getOperand(0); + if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL) + && OpVT == MVT::i32 + && Op0.getValueType() == MVT::i64) { + // Catch the (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32 to + // take advantage of the fact that the upper 32 bits are in the + // i32 preferred slot and avoid all kinds of other shuffle gymnastics: + ConstantSDNode *CN = dyn_cast(Op0.getOperand(1)); + if (CN != 0) { + unsigned shift_amt = unsigned(CN->getZExtValue()); + + if (shift_amt >= 32) { + SDNode *hi32 = + CurDAG->getTargetNode(SPU::ORr32_r64, OpVT, Op0.getOperand(0)); + + shift_amt -= 32; + if (shift_amt > 0) { + // Take care of the additional shift, if present: + SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32); + unsigned Opc = SPU::ROTMAIr32_i32; + + if (Op0.getOpcode() == ISD::SRL) + Opc = SPU::ROTMr32; + + hi32 = CurDAG->getTargetNode(Opc, OpVT, SDValue(hi32, 0), shift); + } + + return hi32; + } + } + } } else if (Opc == ISD::SHL) { if (OpVT == MVT::i64) { return SelectSHLi64(Op, OpVT); @@ -1046,6 +1097,70 @@ SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) { return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0)); } +/*! + Do the necessary magic necessary to load a i64 constant + */ +SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT) { + ConstantSDNode *CN = cast(Op.getNode()); + MVT OpVecVT = MVT::getVectorVT(OpVT, 2); + SDValue i64vec = + SPU::LowerSplat_v2i64(OpVecVT, *CurDAG, CN->getZExtValue()); + + // Here's where it gets interesting, because we have to parse out the + // subtree handed back in i64vec: + + if (i64vec.getOpcode() == ISD::BIT_CONVERT) { + // The degenerate case where the upper and lower bits in the splat are + // identical: + SDValue Op0 = i64vec.getOperand(0); + ReplaceUses(i64vec, Op0); + + return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, + SDValue(emitBuildVector(Op0), 0)); + } else if (i64vec.getOpcode() == SPUISD::SHUFB) { + SDValue lhs = i64vec.getOperand(0); + SDValue rhs = i64vec.getOperand(1); + SDValue shufmask = i64vec.getOperand(2); + + if (lhs.getOpcode() == ISD::BIT_CONVERT) { + ReplaceUses(lhs, lhs.getOperand(0)); + lhs = lhs.getOperand(0); + } + + SDNode *lhsNode = (lhs.getNode()->isMachineOpcode() + ? lhs.getNode() + : emitBuildVector(lhs)); + + if (rhs.getOpcode() == ISD::BIT_CONVERT) { + ReplaceUses(rhs, rhs.getOperand(0)); + rhs = rhs.getOperand(0); + } + + SDNode *rhsNode = (rhs.getNode()->isMachineOpcode() + ? rhs.getNode() + : emitBuildVector(rhs)); + + if (shufmask.getOpcode() == ISD::BIT_CONVERT) { + ReplaceUses(shufmask, shufmask.getOperand(0)); + shufmask = shufmask.getOperand(0); + } + + SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode() + ? shufmask.getNode() + : emitBuildVector(shufmask)); + + SDNode *shufNode = + Select(CurDAG->getNode(SPUISD::SHUFB, OpVecVT, + SDValue(lhsNode, 0), SDValue(rhsNode, 0), + SDValue(shufMaskNode, 0))); + + return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(shufNode, 0)); + } else { + cerr << "SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec condition\n"; + abort(); + } +} + /// createSPUISelDag - This pass converts a legalized DAG into a /// SPU-specific DAG, ready for instruction scheduling. /// diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 124f1a7536b..6bb76d805e3 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -17,6 +17,7 @@ #include "SPUFrameInfo.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/VectorExtras.h" +#include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -79,6 +80,43 @@ namespace { return retval; } + //! Expand a library call into an actual call DAG node + /*! + \note + This code is taken from SelectionDAGLegalize, since it is not exposed as + part of the LLVM SelectionDAG API. + */ + + SDValue + ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, + bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) { + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + MVT ArgVT = Op.getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForMVT(); + Entry.Node = Op.getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT(); + std::pair CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + CallingConv::C, false, Callee, Args, DAG); + + return CallInfo.first; + } } SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) @@ -113,7 +151,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); // SPU constant load actions are custom lowered: - setOperationAction(ISD::Constant, MVT::i64, Custom); setOperationAction(ISD::ConstantFP, MVT::f32, Legal); setOperationAction(ISD::ConstantFP, MVT::f64, Custom); @@ -128,10 +165,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); setLoadExtAction(ISD::SEXTLOAD, VT, Custom); - // SMUL_LOHI, UMUL_LOHI are not legal for Cell: - setOperationAction(ISD::SMUL_LOHI, VT, Expand); - setOperationAction(ISD::UMUL_LOHI, VT, Expand); - for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { MVT StoreVT = (MVT::SimpleValueType) stype; setTruncStoreAction(VT, StoreVT, Expand); @@ -179,16 +212,14 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); - // If we're enabling GP optimizations, use hardware square root + // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt + // for f32!) setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSQRT, MVT::f32, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - // Make sure that DAGCombine doesn't insert illegal 64-bit constants - setOperationAction(ISD::FABS, MVT::f64, Custom); - // SPU can do rotate right and left, so legalize it... but customize for i8 // because instructions don't exist. @@ -254,22 +285,21 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // Custom lower i128 -> i64 truncates setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); - // SPU has a legal FP -> signed INT instruction - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); + // SPU has a legal FP -> signed INT instruction for f32, but for f64, need + // to expand to a libcall, hence the custom lowering: + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); // FDIV on SPU requires custom lowering - setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall + setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall // SPU has [U|S]INT_TO_FP - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); @@ -338,24 +368,23 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) MVT VT = (MVT::SimpleValueType)i; // add/sub are legal for all supported vector VT's. - setOperationAction(ISD::ADD , VT, Legal); - setOperationAction(ISD::SUB , VT, Legal); + setOperationAction(ISD::ADD, VT, Legal); + setOperationAction(ISD::SUB, VT, Legal); // mul has to be custom lowered. - // TODO: v2i64 vector multiply - setOperationAction(ISD::MUL , VT, Legal); + setOperationAction(ISD::MUL, VT, Legal); - setOperationAction(ISD::AND , VT, Legal); - setOperationAction(ISD::OR , VT, Legal); - setOperationAction(ISD::XOR , VT, Legal); - setOperationAction(ISD::LOAD , VT, Legal); - setOperationAction(ISD::SELECT, VT, Legal); - setOperationAction(ISD::STORE, VT, Legal); + setOperationAction(ISD::AND, VT, Legal); + setOperationAction(ISD::OR, VT, Legal); + setOperationAction(ISD::XOR, VT, Legal); + setOperationAction(ISD::LOAD, VT, Legal); + setOperationAction(ISD::SELECT, VT, Legal); + setOperationAction(ISD::STORE, VT, Legal); // These operations need to be expanded: - setOperationAction(ISD::SDIV, VT, Expand); - setOperationAction(ISD::SREM, VT, Expand); - setOperationAction(ISD::UDIV, VT, Expand); - setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); // Custom lower build_vector, constant pool spills, insert and // extract vector elements: @@ -866,31 +895,6 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { return SDValue(); } -//! Custom lower i64 integer constants -/*! - This code inserts all of the necessary juggling that needs to occur to load - a 64-bit constant into a register. - */ -static SDValue -LowerConstant(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - - if (VT == MVT::i64) { - ConstantSDNode *CN = cast(Op.getNode()); - SDValue T = DAG.getConstant(CN->getZExtValue(), VT); - return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T)); - } else { - cerr << "LowerConstant: unhandled constant type " - << VT.getMVTString() - << "\n"; - abort(); - /*NOTREACHED*/ - } - - return SDValue(); -} - //! Custom lower double precision floating point constants static SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG) { @@ -1564,7 +1568,7 @@ static bool isConstantSplat(const uint64_t Bits128[2], //! Lower a BUILD_VECTOR instruction creatively: SDValue -SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { +LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType(); // If this is a vector of constants or undefs, get the bits. A bit in // UndefBits is set if the corresponding element of the vector is an @@ -1588,7 +1592,7 @@ SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { abort(); /*NOTREACHED*/ case MVT::v4f32: { - uint32_t Value32 = SplatBits; + uint32_t Value32 = uint32_t(SplatBits); assert(SplatSize == 4 && "LowerBUILD_VECTOR: Unexpected floating point vector element."); // NOTE: pretend the constant is an integer. LLVM won't load FP constants @@ -1598,7 +1602,7 @@ SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { break; } case MVT::v2f64: { - uint64_t f64val = SplatBits; + uint64_t f64val = uint64_t(SplatBits); assert(SplatSize == 8 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes."); // NOTE: pretend the constant is an integer. LLVM won't load FP constants @@ -1638,93 +1642,99 @@ SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T); } case MVT::v2i64: { - uint64_t val = SplatBits; - uint32_t upper = uint32_t(val >> 32); - uint32_t lower = uint32_t(val); - - if (upper == lower) { - // Magic constant that can be matched by IL, ILA, et. al. - SDValue Val = DAG.getTargetConstant(val, MVT::i64); - return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val); - } else { - SDValue LO32; - SDValue HI32; - SmallVector ShufBytes; - SDValue Result; - bool upper_special, lower_special; - - // NOTE: This code creates common-case shuffle masks that can be easily - // detected as common expressions. It is not attempting to create highly - // specialized masks to replace any and all 0's, 0xff's and 0x80's. - - // Detect if the upper or lower half is a special shuffle mask pattern: - upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000); - lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000); - - // Create lower vector if not a special pattern - if (!lower_special) { - SDValue LO32C = DAG.getConstant(lower, MVT::i32); - LO32 = DAG.getNode(ISD::BIT_CONVERT, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - LO32C, LO32C, LO32C, LO32C)); - } + return SPU::LowerSplat_v2i64(VT, DAG, SplatBits); + } + } - // Create upper vector if not a special pattern - if (!upper_special) { - SDValue HI32C = DAG.getConstant(upper, MVT::i32); - HI32 = DAG.getNode(ISD::BIT_CONVERT, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - HI32C, HI32C, HI32C, HI32C)); - } + return SDValue(); +} - // If either upper or lower are special, then the two input operands are - // the same (basically, one of them is a "don't care") - if (lower_special) - LO32 = HI32; - if (upper_special) - HI32 = LO32; - if (lower_special && upper_special) { - // Unhappy situation... both upper and lower are special, so punt with - // a target constant: - SDValue Zero = DAG.getConstant(0, MVT::i32); - HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero, - Zero, Zero); - } +SDValue +SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal) { + uint32_t upper = uint32_t(SplatVal >> 32); + uint32_t lower = uint32_t(SplatVal); + + if (upper == lower) { + // Magic constant that can be matched by IL, ILA, et. al. + SDValue Val = DAG.getTargetConstant(upper, MVT::i32); + return DAG.getNode(ISD::BIT_CONVERT, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + Val, Val, Val, Val)); + } else { + SDValue LO32; + SDValue HI32; + SmallVector ShufBytes; + SDValue Result; + bool upper_special, lower_special; + + // NOTE: This code creates common-case shuffle masks that can be easily + // detected as common expressions. It is not attempting to create highly + // specialized masks to replace any and all 0's, 0xff's and 0x80's. + + // Detect if the upper or lower half is a special shuffle mask pattern: + upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); + lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); + + // Create lower vector if not a special pattern + if (!lower_special) { + SDValue LO32C = DAG.getConstant(lower, MVT::i32); + LO32 = DAG.getNode(ISD::BIT_CONVERT, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + LO32C, LO32C, LO32C, LO32C)); + } - for (int i = 0; i < 4; ++i) { - uint64_t val = 0; - for (int j = 0; j < 4; ++j) { - SDValue V; - bool process_upper, process_lower; - val <<= 8; - process_upper = (upper_special && (i & 1) == 0); - process_lower = (lower_special && (i & 1) == 1); - - if (process_upper || process_lower) { - if ((process_upper && upper == 0) - || (process_lower && lower == 0)) - val |= 0x80; - else if ((process_upper && upper == 0xffffffff) - || (process_lower && lower == 0xffffffff)) - val |= 0xc0; - else if ((process_upper && upper == 0x80000000) - || (process_lower && lower == 0x80000000)) - val |= (j == 0 ? 0xe0 : 0x80); - } else - val |= i * 4 + j + ((i & 1) * 16); - } + // Create upper vector if not a special pattern + if (!upper_special) { + SDValue HI32C = DAG.getConstant(upper, MVT::i32); + HI32 = DAG.getNode(ISD::BIT_CONVERT, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + HI32C, HI32C, HI32C, HI32C)); + } - ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); + // If either upper or lower are special, then the two input operands are + // the same (basically, one of them is a "don't care") + if (lower_special) + LO32 = HI32; + if (upper_special) + HI32 = LO32; + if (lower_special && upper_special) { + // Unhappy situation... both upper and lower are special, so punt with + // a target constant: + SDValue Zero = DAG.getConstant(0, MVT::i32); + HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero, + Zero, Zero); + } + + for (int i = 0; i < 4; ++i) { + uint64_t val = 0; + for (int j = 0; j < 4; ++j) { + SDValue V; + bool process_upper, process_lower; + val <<= 8; + process_upper = (upper_special && (i & 1) == 0); + process_lower = (lower_special && (i & 1) == 1); + + if (process_upper || process_lower) { + if ((process_upper && upper == 0) + || (process_lower && lower == 0)) + val |= 0x80; + else if ((process_upper && upper == 0xffffffff) + || (process_lower && lower == 0xffffffff)) + val |= 0xc0; + else if ((process_upper && upper == 0x80000000) + || (process_lower && lower == 0x80000000)) + val |= (j == 0 ? 0xe0 : 0x80); + } else + val |= i * 4 + j + ((i & 1) * 16); } - return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - &ShufBytes[0], ShufBytes.size())); + ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); } - } - } - return SDValue(); + return DAG.getNode(SPUISD::SHUFB, OpVT, HI32, LO32, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + &ShufBytes[0], ShufBytes.size())); + } } /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on @@ -2384,81 +2394,180 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { return SDValue(); } -//! Lower ISD::FABS +//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32 /*! - DAGCombine does the same basic reduction: convert the double to i64 and mask - off the sign bit. Unfortunately, DAGCombine inserts the i64 constant, which - CellSPU has to legalize. Hence, the custom lowering. + f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall. + All conversions to i64 are expanded to a libcall. */ - -static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, + SPUTargetLowering &TLI) { MVT OpVT = Op.getValueType(); - MVT IntVT(MVT::i64); SDValue Op0 = Op.getOperand(0); + MVT Op0VT = Op0.getValueType(); + + if ((OpVT == MVT::i32 && Op0VT == MVT::f64) + || OpVT == MVT::i64) { + // Convert f32 / f64 to i32 / i64 via libcall. + RTLIB::Libcall LC = + (Op.getOpcode() == ISD::FP_TO_SINT) + ? RTLIB::getFPTOSINT(Op0VT, OpVT) + : RTLIB::getFPTOUINT(Op0VT, OpVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!"); + SDValue Dummy; + return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); + } + + return SDValue(); +} - assert(OpVT == MVT::f64 && "LowerFABS: expecting MVT::f64!\n"); +//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32 +/*! + i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall. + All conversions from i64 are expanded to a libcall. + */ +static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, + SPUTargetLowering &TLI) { + MVT OpVT = Op.getValueType(); + SDValue Op0 = Op.getOperand(0); + MVT Op0VT = Op0.getValueType(); - SDValue iABS = - DAG.getNode(ISD::AND, IntVT, - DAG.getNode(ISD::BIT_CONVERT, IntVT, Op0), - DAG.getConstant(~IntVT.getIntegerVTSignBit(), IntVT)); + if ((OpVT == MVT::f64 && Op0VT == MVT::i32) + || Op0VT == MVT::i64) { + // Convert i32, i64 to f64 via libcall: + RTLIB::Libcall LC = + (Op.getOpcode() == ISD::SINT_TO_FP) + ? RTLIB::getSINTTOFP(Op0VT, OpVT) + : RTLIB::getUINTTOFP(Op0VT, OpVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!"); + SDValue Dummy; + return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); + } - return DAG.getNode(ISD::BIT_CONVERT, MVT::f64, iABS); + return SDValue(); } //! Lower ISD::SETCC /*! This handles MVT::f64 (double floating point) condition lowering */ - static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { + CondCodeSDNode *CC = dyn_cast(Op.getOperand(2)); + assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); + SDValue lhs = Op.getOperand(0); SDValue rhs = Op.getOperand(1); - CondCodeSDNode *CC = dyn_cast (Op.getOperand(2)); MVT lhsVT = lhs.getValueType(); - SDValue posNaN = DAG.getConstant(0x7ff0000000000001ULL, MVT::i64); - - assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n"); - switch (CC->get()) { - case ISD::SETOEQ: - case ISD::SETOGT: - case ISD::SETOGE: - case ISD::SETOLT: - case ISD::SETOLE: - case ISD::SETONE: - cerr << "CellSPU ISel Select: unimplemented f64 condition\n"; - abort(); - break; - case ISD::SETO: { - SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs); - SDValue i64lhs = - DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs); + MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType()); + APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); + MVT IntVT(MVT::i64); - return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETLT); - } - case ISD::SETUO: { - SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs); - SDValue i64lhs = - DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs); + // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently + // selected to a NOP: + SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, lhs); + SDValue lhsHi32 = + DAG.getNode(ISD::TRUNCATE, MVT::i32, + DAG.getNode(ISD::SRL, IntVT, + i64lhs, DAG.getConstant(32, MVT::i32))); + SDValue lhsHi32abs = + DAG.getNode(ISD::AND, MVT::i32, + lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32)); + SDValue lhsLo32 = + DAG.getNode(ISD::TRUNCATE, MVT::i32, i64lhs); + + // SETO and SETUO only use the lhs operand: + if (CC->get() == ISD::SETO) { + // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of + // SETUO + APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); + return DAG.getNode(ISD::XOR, ccResultVT, + DAG.getSetCC(ccResultVT, + lhs, DAG.getConstantFP(0.0, lhsVT), + ISD::SETUO), + DAG.getConstant(ccResultAllOnes, ccResultVT)); + } else if (CC->get() == ISD::SETUO) { + // Evaluates to true if Op0 is [SQ]NaN + return DAG.getNode(ISD::AND, ccResultVT, + DAG.getSetCC(ccResultVT, + lhsHi32abs, + DAG.getConstant(0x7ff00000, MVT::i32), + ISD::SETGE), + DAG.getSetCC(ccResultVT, + lhsLo32, + DAG.getConstant(0, MVT::i32), + ISD::SETGT)); + } + + SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, rhs); + SDValue rhsHi32 = + DAG.getNode(ISD::TRUNCATE, MVT::i32, + DAG.getNode(ISD::SRL, IntVT, + i64rhs, DAG.getConstant(32, MVT::i32))); + + // If a value is negative, subtract from the sign magnitude constant: + SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT); + + // Convert the sign-magnitude representation into 2's complement: + SDValue lhsSelectMask = DAG.getNode(ISD::SRA, ccResultVT, + lhsHi32, DAG.getConstant(31, MVT::i32)); + SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, IntVT, signMag2TC, i64lhs); + SDValue lhsSelect = + DAG.getNode(ISD::SELECT, IntVT, + lhsSelectMask, lhsSignMag2TC, i64lhs); + + SDValue rhsSelectMask = DAG.getNode(ISD::SRA, ccResultVT, + rhsHi32, DAG.getConstant(31, MVT::i32)); + SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, IntVT, signMag2TC, i64rhs); + SDValue rhsSelect = + DAG.getNode(ISD::SELECT, IntVT, + rhsSelectMask, rhsSignMag2TC, i64rhs); + + unsigned compareOp; - return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETGE); - } + switch (CC->get()) { + case ISD::SETOEQ: case ISD::SETUEQ: + compareOp = ISD::SETEQ; break; + case ISD::SETOGT: case ISD::SETUGT: + compareOp = ISD::SETGT; break; + case ISD::SETOGE: case ISD::SETUGE: + compareOp = ISD::SETGE; break; + case ISD::SETOLT: case ISD::SETULT: + compareOp = ISD::SETLT; break; + case ISD::SETOLE: case ISD::SETULE: + compareOp = ISD::SETLE; break; case ISD::SETUNE: + case ISD::SETONE: + compareOp = ISD::SETNE; break; default: cerr << "CellSPU ISel Select: unimplemented f64 condition\n"; abort(); break; } - return SDValue(); + SDValue result = + DAG.getSetCC(ccResultVT, lhsSelect, rhsSelect, (ISD::CondCode) compareOp); + + if ((CC->get() & 0x8) == 0) { + // Ordered comparison: + SDValue lhsNaN = DAG.getSetCC(ccResultVT, + lhs, DAG.getConstantFP(0.0, MVT::f64), + ISD::SETO); + SDValue rhsNaN = DAG.getSetCC(ccResultVT, + rhs, DAG.getConstantFP(0.0, MVT::f64), + ISD::SETO); + SDValue ordered = DAG.getNode(ISD::AND, ccResultVT, lhsNaN, rhsNaN); + + result = DAG.getNode(ISD::AND, ccResultVT, ordered, result); + } + + return result; } //! Lower ISD::SELECT_CC @@ -2566,8 +2675,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); case ISD::JumpTable: return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); - case ISD::Constant: - return LowerConstant(Op, DAG); case ISD::ConstantFP: return LowerConstantFP(Op, DAG); case ISD::FORMAL_ARGUMENTS: @@ -2590,12 +2697,17 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) break; } - case ISD::FABS: - return LowerFABS(Op, DAG); + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + return LowerFP_TO_INT(Op, DAG, *this); + + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + return LowerINT_TO_FP(Op, DAG, *this); // Vector-related lowering. case ISD::BUILD_VECTOR: - return SPU::LowerBUILD_VECTOR(Op, DAG); + return LowerBUILD_VECTOR(Op, DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 079f3ba69ef..24c2803fe65 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -61,7 +61,7 @@ namespace llvm { }; } - //! Utility functions specific to CellSPU-only: + //! Utility functions specific to CellSPU: namespace SPU { SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG, MVT ValueType); @@ -78,7 +78,7 @@ namespace llvm { SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM); - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG); + SDValue LowerSplat_v2i64(MVT OpVT, SelectionDAG &DAG, uint64_t splat); SDValue getBorrowGenerateShufMask(SelectionDAG &DAG); SDValue getCarryGenerateShufMask(SelectionDAG &DAG); diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 91d52facada..f35a42d71a9 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -155,13 +155,13 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, case SPU::ORr8_r32: case SPU::ORr32_r16: case SPU::ORr32_r8: - case SPU::ORr32_r64: case SPU::ORr16_r64: case SPU::ORr8_r64: - case SPU::ORr64_r32: case SPU::ORr64_r16: case SPU::ORr64_r8: */ + case SPU::ORr64_r32: + case SPU::ORr32_r64: case SPU::ORf32_r32: case SPU::ORr32_f32: case SPU::ORf64_r64: diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 2834a1eb8d9..8db2fa7fced 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -1259,6 +1259,9 @@ multiclass BitwiseAnd def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB), [/* Intentionally does not match a pattern */]>; + def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, VECREG:$rB), + [/* Intentionally does not match a pattern */]>; + // Could use v4i32, but won't for clarity def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [/* Intentionally does not match a pattern */]>; @@ -1525,17 +1528,17 @@ multiclass BitwiseOr // Conversion from R32C to register def r32_r16: ORCvtFormR32Reg; def r32_r8: ORCvtFormR32Reg; +*/ - // Conversion from register to R64C: + // Conversion to register from R64C: def r32_r64: ORCvtFormR64Reg; - def r16_r64: ORCvtFormR64Reg; - def r8_r64: ORCvtFormR64Reg; + // def r16_r64: ORCvtFormR64Reg; + // def r8_r64: ORCvtFormR64Reg; - // Conversion from R64C to register + // Conversion to R64C from register def r64_r32: ORCvtFormRegR64; - def r64_r16: ORCvtFormRegR64; - def r64_r8: ORCvtFormRegR64; -*/ + // def r64_r16: ORCvtFormRegR64; + // def r64_r8: ORCvtFormRegR64; // bitconvert patterns: def r32_f32: ORCvtFormR32Reg pattern>: RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC", IntegerOp, pattern>; -class SELBVecInst: +class SELBVecInst: SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), [(set (vectype VECREG:$rT), (or (and (vectype VECREG:$rC), (vectype VECREG:$rB)), - (and (vnot (vectype VECREG:$rC)), + (and (vnot_frag (vectype VECREG:$rC)), (vectype VECREG:$rA))))]>; class SELBVecVCondInst: @@ -1947,7 +1950,7 @@ multiclass SelectBits def v16i8: SELBVecInst; def v8i16: SELBVecInst; def v4i32: SELBVecInst; - def v2i64: SELBVecInst; + def v2i64: SELBVecInst; def r128: SELBRegInst; def r64: SELBRegInst; @@ -4321,6 +4324,13 @@ def : Pat<(fabs (v4f32 VECREG:$rA)), (ANDfabsvec (v4f32 VECREG:$rA), (v4f32 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>; +def : Pat<(fabs R64FP:$rA), + (ANDfabs64 R64FP:$rA, (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f))>; + +def : Pat<(fabs (v2f64 VECREG:$rA)), + (ANDfabsvec (v2f64 VECREG:$rA), + (v2f64 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>; + //===----------------------------------------------------------------------===// // Hint for branch instructions: //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/CellSPU/fcmp.ll b/test/CodeGen/CellSPU/fcmp32.ll similarity index 56% rename from test/CodeGen/CellSPU/fcmp.ll rename to test/CodeGen/CellSPU/fcmp32.ll index aad77175d16..27a659e8293 100644 --- a/test/CodeGen/CellSPU/fcmp.ll +++ b/test/CodeGen/CellSPU/fcmp32.ll @@ -1,22 +1,23 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s ; RUN: grep fceq %t1.s | count 1 ; RUN: grep fcmeq %t1.s | count 1 -; -; This file includes standard floating point arithmetic instructions + target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" target triple = "spu" +; Exercise the floating point comparison operators for f32: + declare double @fabs(double) declare float @fabsf(float) define i1 @fcmp_eq(float %arg1, float %arg2) { - %A = fcmp oeq float %arg1, %arg2 ; [#uses=1] + %A = fcmp oeq float %arg1, %arg2 ret i1 %A } define i1 @fcmp_mag_eq(float %arg1, float %arg2) { - %A = call float @fabsf(float %arg1) ; [#uses=1] - %B = call float @fabsf(float %arg2) ; [#uses=1] - %C = fcmp oeq float %A, %B ; [#uses=1] - ret i1 %C + %1 = call float @fabsf(float %arg1) + %2 = call float @fabsf(float %arg2) + %3 = fcmp oeq float %1, %2 + ret i1 %3 } diff --git a/test/CodeGen/CellSPU/fcmp64.ll b/test/CodeGen/CellSPU/fcmp64.ll new file mode 100644 index 00000000000..1906bfe7dda --- /dev/null +++ b/test/CodeGen/CellSPU/fcmp64.ll @@ -0,0 +1,7 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s + +define i1 @fcmp_eq_setcc_f64(double %arg1, double %arg2) nounwind { +entry: + %A = fcmp oeq double %arg1, %arg2 + ret i1 %A +} diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll index 70220a563d9..b6eca10803e 100644 --- a/test/CodeGen/CellSPU/fneg-fabs.ll +++ b/test/CodeGen/CellSPU/fneg-fabs.ll @@ -1,9 +1,10 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s -; RUN: grep fsmbi %t1.s | count 2 +; RUN: grep fsmbi %t1.s | count 3 ; RUN: grep 32768 %t1.s | count 2 ; RUN: grep xor %t1.s | count 4 -; RUN: grep and %t1.s | count 4 -; RUN: grep andbi %t1.s | count 2 +; RUN: grep and %t1.s | count 5 +; RUN: grep andbi %t1.s | count 3 + target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" target triple = "spu" @@ -33,11 +34,11 @@ declare double @fabs(double) declare float @fabsf(float) define double @fabs_dp(double %X) { - %Y = call double @fabs( double %X ) ; [#uses=1] + %Y = call double @fabs( double %X ) ret double %Y } define float @fabs_sp(float %X) { - %Y = call float @fabsf( float %X ) ; [#uses=1] + %Y = call float @fabsf( float %X ) ret float %Y } diff --git a/test/CodeGen/CellSPU/select_bits.ll b/test/CodeGen/CellSPU/select_bits.ll index 3a7334d808c..e83e47606c2 100644 --- a/test/CodeGen/CellSPU/select_bits.ll +++ b/test/CodeGen/CellSPU/select_bits.ll @@ -1,5 +1,5 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s -; RUN: grep selb %t1.s | count 280 +; RUN: grep selb %t1.s | count 56 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" target triple = "spu" @@ -9,7 +9,7 @@ target triple = "spu" ;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ ; (or (and rC, rB), (and (not rC), rA)) -define <2 x i64> @selb_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { +define <2 x i64> @selectbits_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { %C = and <2 x i64> %rC, %rB %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > %B = and <2 x i64> %A, %rA @@ -18,7 +18,7 @@ define <2 x i64> @selb_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { } ; (or (and rB, rC), (and (not rC), rA)) -define <2 x i64> @selb_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { +define <2 x i64> @selectbits_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { %C = and <2 x i64> %rB, %rC %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > %B = and <2 x i64> %A, %rA @@ -27,7 +27,7 @@ define <2 x i64> @selb_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { } ; (or (and (not rC), rA), (and rB, rC)) -define <2 x i64> @selb_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { +define <2 x i64> @selectbits_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > %B = and <2 x i64> %A, %rA %C = and <2 x i64> %rB, %rC @@ -36,7 +36,7 @@ define <2 x i64> @selb_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { } ; (or (and (not rC), rA), (and rC, rB)) -define <2 x i64> @selb_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { +define <2 x i64> @selectbits_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > %B = and <2 x i64> %A, %rA %C = and <2 x i64> %rC, %rB @@ -45,7 +45,7 @@ define <2 x i64> @selb_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { } ; (or (and rC, rB), (and rA, (not rC))) -define <2 x i64> @selb_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { +define <2 x i64> @selectbits_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { %C = and <2 x i64> %rC, %rB %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > %B = and <2 x i64> %rA, %A @@ -54,7 +54,7 @@ define <2 x i64> @selb_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { } ; (or (and rB, rC), (and rA, (not rC))) -define <2 x i64> @selb_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { +define <2 x i64> @selectbits_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { %C = and <2 x i64> %rB, %rC %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > %B = and <2 x i64> %rA, %A @@ -63,7 +63,7 @@ define <2 x i64> @selb_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { } ; (or (and rA, (not rC)), (and rB, rC)) -define <2 x i64> @selb_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { +define <2 x i64> @selectbits_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > %B = and <2 x i64> %rA, %A %C = and <2 x i64> %rB, %rC @@ -72,7 +72,7 @@ define <2 x i64> @selb_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { } ; (or (and rA, (not rC)), (and rC, rB)) -define <2 x i64> @selb_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { +define <2 x i64> @selectbits_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > %B = and <2 x i64> %rA, %A %C = and <2 x i64> %rC, %rB @@ -85,7 +85,7 @@ define <2 x i64> @selb_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { ;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ ; (or (and rC, rB), (and (not rC), rA)) -define <4 x i32> @selb_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { +define <4 x i32> @selectbits_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { %C = and <4 x i32> %rC, %rB %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 > %B = and <4 x i32> %A, %rA @@ -94,7 +94,7 @@ define <4 x i32> @selb_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { } ; (or (and rB, rC), (and (not rC), rA)) -define <4 x i32> @selb_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { +define <4 x i32> @selectbits_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { %C = and <4 x i32> %rB, %rC %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 > %B = and <4 x i32> %A, %rA @@ -103,7 +103,7 @@ define <4 x i32> @selb_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { } ; (or (and (not rC), rA), (and rB, rC)) -define <4 x i32> @selb_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { +define <4 x i32> @selectbits_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 > %B = and <4 x i32> %A, %rA %C = and <4 x i32> %rB, %rC @@ -112,7 +112,7 @@ define <4 x i32> @selb_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { } ; (or (and (not rC), rA), (and rC, rB)) -define <4 x i32> @selb_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { +define <4 x i32> @selectbits_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> %B = and <4 x i32> %A, %rA %C = and <4 x i32> %rC, %rB @@ -121,7 +121,7 @@ define <4 x i32> @selb_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { } ; (or (and rC, rB), (and rA, (not rC))) -define <4 x i32> @selb_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { +define <4 x i32> @selectbits_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { %C = and <4 x i32> %rC, %rB %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> %B = and <4 x i32> %rA, %A @@ -130,7 +130,7 @@ define <4 x i32> @selb_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { } ; (or (and rB, rC), (and rA, (not rC))) -define <4 x i32> @selb_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { +define <4 x i32> @selectbits_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { %C = and <4 x i32> %rB, %rC %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> %B = and <4 x i32> %rA, %A @@ -139,7 +139,7 @@ define <4 x i32> @selb_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { } ; (or (and rA, (not rC)), (and rB, rC)) -define <4 x i32> @selb_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { +define <4 x i32> @selectbits_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> %B = and <4 x i32> %rA, %A %C = and <4 x i32> %rB, %rC @@ -148,7 +148,7 @@ define <4 x i32> @selb_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { } ; (or (and rA, (not rC)), (and rC, rB)) -define <4 x i32> @selb_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { +define <4 x i32> @selectbits_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> %B = and <4 x i32> %rA, %A %C = and <4 x i32> %rC, %rB @@ -161,7 +161,7 @@ define <4 x i32> @selb_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { ;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ ; (or (and rC, rB), (and (not rC), rA)) -define <8 x i16> @selb_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { +define <8 x i16> @selectbits_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { %C = and <8 x i16> %rC, %rB %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > @@ -171,7 +171,7 @@ define <8 x i16> @selb_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { } ; (or (and rB, rC), (and (not rC), rA)) -define <8 x i16> @selb_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { +define <8 x i16> @selectbits_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { %C = and <8 x i16> %rB, %rC %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > @@ -181,7 +181,7 @@ define <8 x i16> @selb_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { } ; (or (and (not rC), rA), (and rB, rC)) -define <8 x i16> @selb_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { +define <8 x i16> @selectbits_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > %B = and <8 x i16> %A, %rA @@ -191,7 +191,7 @@ define <8 x i16> @selb_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { } ; (or (and (not rC), rA), (and rC, rB)) -define <8 x i16> @selb_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { +define <8 x i16> @selectbits_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > %B = and <8 x i16> %A, %rA @@ -201,7 +201,7 @@ define <8 x i16> @selb_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { } ; (or (and rC, rB), (and rA, (not rC))) -define <8 x i16> @selb_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { +define <8 x i16> @selectbits_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { %C = and <8 x i16> %rC, %rB %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > @@ -211,7 +211,7 @@ define <8 x i16> @selb_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { } ; (or (and rB, rC), (and rA, (not rC))) -define <8 x i16> @selb_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { +define <8 x i16> @selectbits_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { %C = and <8 x i16> %rB, %rC %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > @@ -221,7 +221,7 @@ define <8 x i16> @selb_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { } ; (or (and rA, (not rC)), (and rB, rC)) -define <8 x i16> @selb_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { +define <8 x i16> @selectbits_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > %B = and <8 x i16> %rA, %A @@ -231,7 +231,7 @@ define <8 x i16> @selb_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { } ; (or (and rA, (not rC)), (and rC, rB)) -define <8 x i16> @selb_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { +define <8 x i16> @selectbits_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > %B = and <8 x i16> %rA, %A @@ -245,7 +245,7 @@ define <8 x i16> @selb_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { ;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ ; (or (and rC, rB), (and (not rC), rA)) -define <16 x i8> @selb_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { +define <16 x i8> @selectbits_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { %C = and <16 x i8> %rC, %rB %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, @@ -257,7 +257,7 @@ define <16 x i8> @selb_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { } ; (or (and rB, rC), (and (not rC), rA)) -define <16 x i8> @selb_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { +define <16 x i8> @selectbits_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { %C = and <16 x i8> %rB, %rC %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, @@ -269,7 +269,7 @@ define <16 x i8> @selb_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { } ; (or (and (not rC), rA), (and rB, rC)) -define <16 x i8> @selb_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { +define <16 x i8> @selectbits_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, @@ -281,7 +281,7 @@ define <16 x i8> @selb_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { } ; (or (and (not rC), rA), (and rC, rB)) -define <16 x i8> @selb_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { +define <16 x i8> @selectbits_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, @@ -293,7 +293,7 @@ define <16 x i8> @selb_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { } ; (or (and rC, rB), (and rA, (not rC))) -define <16 x i8> @selb_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { +define <16 x i8> @selectbits_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { %C = and <16 x i8> %rC, %rB %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, @@ -305,7 +305,7 @@ define <16 x i8> @selb_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { } ; (or (and rB, rC), (and rA, (not rC))) -define <16 x i8> @selb_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { +define <16 x i8> @selectbits_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { %C = and <16 x i8> %rB, %rC %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, @@ -317,7 +317,7 @@ define <16 x i8> @selb_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { } ; (or (and rA, (not rC)), (and rB, rC)) -define <16 x i8> @selb_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { +define <16 x i8> @selectbits_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, @@ -329,7 +329,7 @@ define <16 x i8> @selb_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { } ; (or (and rA, (not rC)), (and rC, rB)) -define <16 x i8> @selb_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { +define <16 x i8> @selectbits_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, @@ -345,7 +345,7 @@ define <16 x i8> @selb_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { ;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ ; (or (and rC, rB), (and (not rC), rA)) -define i32 @selb_i32_01(i32 %rA, i32 %rB, i32 %rC) { +define i32 @selectbits_i32_01(i32 %rA, i32 %rB, i32 %rC) { %C = and i32 %rC, %rB %A = xor i32 %rC, -1 %B = and i32 %A, %rA @@ -354,7 +354,7 @@ define i32 @selb_i32_01(i32 %rA, i32 %rB, i32 %rC) { } ; (or (and rB, rC), (and (not rC), rA)) -define i32 @selb_i32_02(i32 %rA, i32 %rB, i32 %rC) { +define i32 @selectbits_i32_02(i32 %rA, i32 %rB, i32 %rC) { %C = and i32 %rB, %rC %A = xor i32 %rC, -1 %B = and i32 %A, %rA @@ -363,7 +363,7 @@ define i32 @selb_i32_02(i32 %rA, i32 %rB, i32 %rC) { } ; (or (and (not rC), rA), (and rB, rC)) -define i32 @selb_i32_03(i32 %rA, i32 %rB, i32 %rC) { +define i32 @selectbits_i32_03(i32 %rA, i32 %rB, i32 %rC) { %A = xor i32 %rC, -1 %B = and i32 %A, %rA %C = and i32 %rB, %rC @@ -372,7 +372,7 @@ define i32 @selb_i32_03(i32 %rA, i32 %rB, i32 %rC) { } ; (or (and (not rC), rA), (and rC, rB)) -define i32 @selb_i32_04(i32 %rA, i32 %rB, i32 %rC) { +define i32 @selectbits_i32_04(i32 %rA, i32 %rB, i32 %rC) { %A = xor i32 %rC, -1 %B = and i32 %A, %rA %C = and i32 %rC, %rB @@ -381,7 +381,7 @@ define i32 @selb_i32_04(i32 %rA, i32 %rB, i32 %rC) { } ; (or (and rC, rB), (and rA, (not rC))) -define i32 @selb_i32_05(i32 %rA, i32 %rB, i32 %rC) { +define i32 @selectbits_i32_05(i32 %rA, i32 %rB, i32 %rC) { %C = and i32 %rC, %rB %A = xor i32 %rC, -1 %B = and i32 %rA, %A @@ -390,7 +390,7 @@ define i32 @selb_i32_05(i32 %rA, i32 %rB, i32 %rC) { } ; (or (and rB, rC), (and rA, (not rC))) -define i32 @selb_i32_06(i32 %rA, i32 %rB, i32 %rC) { +define i32 @selectbits_i32_06(i32 %rA, i32 %rB, i32 %rC) { %C = and i32 %rB, %rC %A = xor i32 %rC, -1 %B = and i32 %rA, %A @@ -399,7 +399,7 @@ define i32 @selb_i32_06(i32 %rA, i32 %rB, i32 %rC) { } ; (or (and rA, (not rC)), (and rB, rC)) -define i32 @selb_i32_07(i32 %rA, i32 %rB, i32 %rC) { +define i32 @selectbits_i32_07(i32 %rA, i32 %rB, i32 %rC) { %A = xor i32 %rC, -1 %B = and i32 %rA, %A %C = and i32 %rB, %rC @@ -408,7 +408,7 @@ define i32 @selb_i32_07(i32 %rA, i32 %rB, i32 %rC) { } ; (or (and rA, (not rC)), (and rC, rB)) -define i32 @selb_i32_08(i32 %rA, i32 %rB, i32 %rC) { +define i32 @selectbits_i32_08(i32 %rA, i32 %rB, i32 %rC) { %A = xor i32 %rC, -1 %B = and i32 %rA, %A %C = and i32 %rC, %rB @@ -421,7 +421,7 @@ define i32 @selb_i32_08(i32 %rA, i32 %rB, i32 %rC) { ;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ ; (or (and rC, rB), (and (not rC), rA)) -define i16 @selb_i16_01(i16 %rA, i16 %rB, i16 %rC) { +define i16 @selectbits_i16_01(i16 %rA, i16 %rB, i16 %rC) { %C = and i16 %rC, %rB %A = xor i16 %rC, -1 %B = and i16 %A, %rA @@ -430,7 +430,7 @@ define i16 @selb_i16_01(i16 %rA, i16 %rB, i16 %rC) { } ; (or (and rB, rC), (and (not rC), rA)) -define i16 @selb_i16_02(i16 %rA, i16 %rB, i16 %rC) { +define i16 @selectbits_i16_02(i16 %rA, i16 %rB, i16 %rC) { %C = and i16 %rB, %rC %A = xor i16 %rC, -1 %B = and i16 %A, %rA @@ -439,7 +439,7 @@ define i16 @selb_i16_02(i16 %rA, i16 %rB, i16 %rC) { } ; (or (and (not rC), rA), (and rB, rC)) -define i16 @selb_i16_03(i16 %rA, i16 %rB, i16 %rC) { +define i16 @selectbits_i16_03(i16 %rA, i16 %rB, i16 %rC) { %A = xor i16 %rC, -1 %B = and i16 %A, %rA %C = and i16 %rB, %rC @@ -448,7 +448,7 @@ define i16 @selb_i16_03(i16 %rA, i16 %rB, i16 %rC) { } ; (or (and (not rC), rA), (and rC, rB)) -define i16 @selb_i16_04(i16 %rA, i16 %rB, i16 %rC) { +define i16 @selectbits_i16_04(i16 %rA, i16 %rB, i16 %rC) { %A = xor i16 %rC, -1 %B = and i16 %A, %rA %C = and i16 %rC, %rB @@ -457,7 +457,7 @@ define i16 @selb_i16_04(i16 %rA, i16 %rB, i16 %rC) { } ; (or (and rC, rB), (and rA, (not rC))) -define i16 @selb_i16_05(i16 %rA, i16 %rB, i16 %rC) { +define i16 @selectbits_i16_05(i16 %rA, i16 %rB, i16 %rC) { %C = and i16 %rC, %rB %A = xor i16 %rC, -1 %B = and i16 %rA, %A @@ -466,7 +466,7 @@ define i16 @selb_i16_05(i16 %rA, i16 %rB, i16 %rC) { } ; (or (and rB, rC), (and rA, (not rC))) -define i16 @selb_i16_06(i16 %rA, i16 %rB, i16 %rC) { +define i16 @selectbits_i16_06(i16 %rA, i16 %rB, i16 %rC) { %C = and i16 %rB, %rC %A = xor i16 %rC, -1 %B = and i16 %rA, %A @@ -475,7 +475,7 @@ define i16 @selb_i16_06(i16 %rA, i16 %rB, i16 %rC) { } ; (or (and rA, (not rC)), (and rB, rC)) -define i16 @selb_i16_07(i16 %rA, i16 %rB, i16 %rC) { +define i16 @selectbits_i16_07(i16 %rA, i16 %rB, i16 %rC) { %A = xor i16 %rC, -1 %B = and i16 %rA, %A %C = and i16 %rB, %rC @@ -484,7 +484,7 @@ define i16 @selb_i16_07(i16 %rA, i16 %rB, i16 %rC) { } ; (or (and rA, (not rC)), (and rC, rB)) -define i16 @selb_i16_08(i16 %rA, i16 %rB, i16 %rC) { +define i16 @selectbits_i16_08(i16 %rA, i16 %rB, i16 %rC) { %A = xor i16 %rC, -1 %B = and i16 %rA, %A %C = and i16 %rC, %rB @@ -497,7 +497,7 @@ define i16 @selb_i16_08(i16 %rA, i16 %rB, i16 %rC) { ;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ ; (or (and rC, rB), (and (not rC), rA)) -define i8 @selb_i8_01(i8 %rA, i8 %rB, i8 %rC) { +define i8 @selectbits_i8_01(i8 %rA, i8 %rB, i8 %rC) { %C = and i8 %rC, %rB %A = xor i8 %rC, -1 %B = and i8 %A, %rA @@ -506,7 +506,7 @@ define i8 @selb_i8_01(i8 %rA, i8 %rB, i8 %rC) { } ; (or (and rB, rC), (and (not rC), rA)) -define i8 @selb_i8_02(i8 %rA, i8 %rB, i8 %rC) { +define i8 @selectbits_i8_02(i8 %rA, i8 %rB, i8 %rC) { %C = and i8 %rB, %rC %A = xor i8 %rC, -1 %B = and i8 %A, %rA @@ -515,7 +515,7 @@ define i8 @selb_i8_02(i8 %rA, i8 %rB, i8 %rC) { } ; (or (and (not rC), rA), (and rB, rC)) -define i8 @selb_i8_03(i8 %rA, i8 %rB, i8 %rC) { +define i8 @selectbits_i8_03(i8 %rA, i8 %rB, i8 %rC) { %A = xor i8 %rC, -1 %B = and i8 %A, %rA %C = and i8 %rB, %rC @@ -524,7 +524,7 @@ define i8 @selb_i8_03(i8 %rA, i8 %rB, i8 %rC) { } ; (or (and (not rC), rA), (and rC, rB)) -define i8 @selb_i8_04(i8 %rA, i8 %rB, i8 %rC) { +define i8 @selectbits_i8_04(i8 %rA, i8 %rB, i8 %rC) { %A = xor i8 %rC, -1 %B = and i8 %A, %rA %C = and i8 %rC, %rB @@ -533,7 +533,7 @@ define i8 @selb_i8_04(i8 %rA, i8 %rB, i8 %rC) { } ; (or (and rC, rB), (and rA, (not rC))) -define i8 @selb_i8_05(i8 %rA, i8 %rB, i8 %rC) { +define i8 @selectbits_i8_05(i8 %rA, i8 %rB, i8 %rC) { %C = and i8 %rC, %rB %A = xor i8 %rC, -1 %B = and i8 %rA, %A @@ -542,7 +542,7 @@ define i8 @selb_i8_05(i8 %rA, i8 %rB, i8 %rC) { } ; (or (and rB, rC), (and rA, (not rC))) -define i8 @selb_i8_06(i8 %rA, i8 %rB, i8 %rC) { +define i8 @selectbits_i8_06(i8 %rA, i8 %rB, i8 %rC) { %C = and i8 %rB, %rC %A = xor i8 %rC, -1 %B = and i8 %rA, %A @@ -551,7 +551,7 @@ define i8 @selb_i8_06(i8 %rA, i8 %rB, i8 %rC) { } ; (or (and rA, (not rC)), (and rB, rC)) -define i8 @selb_i8_07(i8 %rA, i8 %rB, i8 %rC) { +define i8 @selectbits_i8_07(i8 %rA, i8 %rB, i8 %rC) { %A = xor i8 %rC, -1 %B = and i8 %rA, %A %C = and i8 %rB, %rC @@ -560,7 +560,7 @@ define i8 @selb_i8_07(i8 %rA, i8 %rB, i8 %rC) { } ; (or (and rA, (not rC)), (and rC, rB)) -define i8 @selb_i8_08(i8 %rA, i8 %rB, i8 %rC) { +define i8 @selectbits_i8_08(i8 %rA, i8 %rB, i8 %rC) { %A = xor i8 %rC, -1 %B = and i8 %rA, %A %C = and i8 %rC, %rB diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll index 2df2f96435d..3c26baa7c7a 100644 --- a/test/CodeGen/CellSPU/shift_ops.ll +++ b/test/CodeGen/CellSPU/shift_ops.ll @@ -275,3 +275,9 @@ define i64 @ashr_i64_3(i64 %arg1, i32 %shift) { %2 = ashr i64 %arg1, %1 ret i64 %2 } + +define i32 @hi32_i64(i64 %arg) { + %1 = lshr i64 %arg, 32 + %2 = trunc i64 %1 to i32 + ret i32 %2 +} -- 2.34.1