X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FSystemZ%2FSystemZISelLowering.cpp;h=09d03f7a891a0954ac2035073e110e2ae0020aaf;hp=ff79a48179f725affe90fe1da97f8278c9b1620f;hb=f29cc18dcb043a173f211f8f3e5caabc6f2e7b08;hpb=cf0fa9b9dd296771a2de766c5262b96938cf13a3 diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index ff79a48179f..09d03f7a891 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -81,10 +81,11 @@ static MachineOperand earlyUseOperand(MachineOperand Op) { return Op; } -SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, +SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI) - : TargetLowering(tm), Subtarget(STI) { - MVT PtrVT = getPointerTy(); + : TargetLowering(TM), Subtarget(STI) { + auto &DL = *TM.getDataLayout(); + MVT PtrVT = getPointerTy(DL); // Set up the register classes. if (Subtarget.hasHighWord()) @@ -318,6 +319,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, // Convert a GPR scalar to a vector by inserting it into element 0. setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + // Use a series of unpacks for extensions. + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); + // Detect shifts by a scalar amount and convert them into // V*_BY_SCALAR. setOperationAction(ISD::SHL, VT, Custom); @@ -436,6 +441,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, // Handle intrinsics. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); // We want to use MVC in preference to even a single load/store pair. MaxStoresPerMemcpy = 0; @@ -450,7 +456,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, MaxStoresPerMemsetOptSize = 0; } -EVT SystemZTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { +EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, + LLVMContext &, EVT VT) const { if (!VT.isVector()) return MVT::i32; return VT.changeVectorElementTypeToInteger(); @@ -501,9 +508,10 @@ bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, *Fast = true; return true; } - + bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // Punt on globals for now, although they can be used in limited // RELATIVE LONG cases. if (AM.BaseGV) @@ -538,7 +546,7 @@ bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const { //===----------------------------------------------------------------------===// TargetLowering::ConstraintType -SystemZTargetLowering::getConstraintType(const std::string &Constraint) const { +SystemZTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'a': // Address register @@ -635,13 +643,14 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info, // has already been verified. MC is the class associated with "t" and // Map maps 0-based register numbers to LLVM register numbers. static std::pair -parseRegisterNumber(const std::string &Constraint, - const TargetRegisterClass *RC, const unsigned *Map) { +parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, + const unsigned *Map) { assert(*(Constraint.end()-1) == '}' && "Missing '}'"); if (isdigit(Constraint[2])) { - std::string Suffix(Constraint.data() + 2, Constraint.size() - 2); - unsigned Index = atoi(Suffix.c_str()); - if (Index < 16 && Map[Index]) + unsigned Index; + bool Failed = + Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index); + if (!Failed && Index < 16 && Map[Index]) return std::make_pair(Map[Index], RC); } return std::make_pair(0U, nullptr); @@ -649,8 +658,7 @@ parseRegisterNumber(const std::string &Constraint, std::pair SystemZTargetLowering::getRegForInlineAsmConstraint( - const TargetRegisterInfo *TRI, const std::string &Constraint, - MVT VT) const { + const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC Constraint Letters switch (Constraint[0]) { @@ -681,7 +689,7 @@ SystemZTargetLowering::getRegForInlineAsmConstraint( return std::make_pair(0U, &SystemZ::FP32BitRegClass); } } - if (Constraint[0] == '{') { + if (Constraint.size() > 0 && Constraint[0] == '{') { // We need to override the default register parsing for GPRs and FPRs // because the interpretation depends on VT. The internal names of // the registers are also different from the external names @@ -773,6 +781,24 @@ bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { return true; } +// We do not yet support 128-bit single-element vector types. If the user +// attempts to use such types as function argument or return type, prefer +// to error out instead of emitting code violating the ABI. +static void VerifyVectorType(MVT VT, EVT ArgVT) { + if (ArgVT.isVector() && !VT.isVector()) + report_fatal_error("Unsupported vector argument or return type"); +} + +static void VerifyVectorTypes(const SmallVectorImpl &Ins) { + for (unsigned i = 0; i < Ins.size(); ++i) + VerifyVectorType(Ins[i].VT, Ins[i].ArgVT); +} + +static void VerifyVectorTypes(const SmallVectorImpl &Outs) { + for (unsigned i = 0; i < Outs.size(); ++i) + VerifyVectorType(Outs[i].VT, Outs[i].ArgVT); +} + // Value is a value that has been passed to us in the location described by VA // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining // any loads onto Chain. @@ -793,7 +819,15 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL, else if (VA.getLocInfo() == CCValAssign::Indirect) Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value, MachinePointerInfo(), false, false, false, 0); - else + else if (VA.getLocInfo() == CCValAssign::BCvt) { + // If this is a short vector argument loaded from the stack, + // extend from i64 to full vector size and then bitcast. + assert(VA.getLocVT() == MVT::i64); + assert(VA.getValVT().isVector()); + Value = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i64, + Value, DAG.getUNDEF(MVT::i64)); + Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value); + } else assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); return Value; } @@ -810,6 +844,14 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL, return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value); case CCValAssign::AExt: return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); + case CCValAssign::BCvt: + // If this is a short vector argument to be stored to the stack, + // bitcast to v2i64 and then extract first element. + assert(VA.getLocVT() == MVT::i64); + assert(VA.getValVT().isVector()); + Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value, + DAG.getConstant(0, DL, MVT::i32)); case CCValAssign::Full: return Value; default: @@ -830,6 +872,10 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, auto *TFL = static_cast(Subtarget.getFrameLowering()); + // Detect unsupported vector argument types. + if (Subtarget.hasVector()) + VerifyVectorTypes(Ins); + // Assign locations to all of the incoming arguments. SmallVector ArgLocs; SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); @@ -887,7 +933,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, // Create the SelectionDAG nodes corresponding to a load // from this parameter. Unpromoted ints and floats are // passed as right-justified 8-byte values. - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, @@ -925,7 +971,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) { unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]); int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true); - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I], &SystemZ::FP64BitRegClass); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); @@ -975,7 +1021,13 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(MF.getDataLayout()); + + // Detect unsupported vector argument and return types. + if (Subtarget.hasVector()) { + VerifyVectorTypes(Outs); + VerifyVectorTypes(Ins); + } // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; @@ -1131,6 +1183,10 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); + // Detect unsupported vector return types. + if (Subtarget.hasVector()) + VerifyVectorTypes(Outs); + // Assign locations to each returned value. SmallVector RetLocs; CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); @@ -1201,6 +1257,143 @@ static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, } } +// Return true if Op is an intrinsic node without chain that returns the +// CC value as its final argument. Provide the associated SystemZISD +// opcode and the mask of valid CC values if so. +static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { + unsigned Id = cast(Op.getOperand(0))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_vpkshs: + case Intrinsic::s390_vpksfs: + case Intrinsic::s390_vpksgs: + Opcode = SystemZISD::PACKS_CC; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vpklshs: + case Intrinsic::s390_vpklsfs: + case Intrinsic::s390_vpklsgs: + Opcode = SystemZISD::PACKLS_CC; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vceqbs: + case Intrinsic::s390_vceqhs: + case Intrinsic::s390_vceqfs: + case Intrinsic::s390_vceqgs: + Opcode = SystemZISD::VICMPES; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vchbs: + case Intrinsic::s390_vchhs: + case Intrinsic::s390_vchfs: + case Intrinsic::s390_vchgs: + Opcode = SystemZISD::VICMPHS; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vchlbs: + case Intrinsic::s390_vchlhs: + case Intrinsic::s390_vchlfs: + case Intrinsic::s390_vchlgs: + Opcode = SystemZISD::VICMPHLS; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vtm: + Opcode = SystemZISD::VTM; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vfaebs: + case Intrinsic::s390_vfaehs: + case Intrinsic::s390_vfaefs: + Opcode = SystemZISD::VFAE_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfaezbs: + case Intrinsic::s390_vfaezhs: + case Intrinsic::s390_vfaezfs: + Opcode = SystemZISD::VFAEZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfeebs: + case Intrinsic::s390_vfeehs: + case Intrinsic::s390_vfeefs: + Opcode = SystemZISD::VFEE_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfeezbs: + case Intrinsic::s390_vfeezhs: + case Intrinsic::s390_vfeezfs: + Opcode = SystemZISD::VFEEZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfenebs: + case Intrinsic::s390_vfenehs: + case Intrinsic::s390_vfenefs: + Opcode = SystemZISD::VFENE_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfenezbs: + case Intrinsic::s390_vfenezhs: + case Intrinsic::s390_vfenezfs: + Opcode = SystemZISD::VFENEZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vistrbs: + case Intrinsic::s390_vistrhs: + case Intrinsic::s390_vistrfs: + Opcode = SystemZISD::VISTR_CC; + CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3; + return true; + + case Intrinsic::s390_vstrcbs: + case Intrinsic::s390_vstrchs: + case Intrinsic::s390_vstrcfs: + Opcode = SystemZISD::VSTRC_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vstrczbs: + case Intrinsic::s390_vstrczhs: + case Intrinsic::s390_vstrczfs: + Opcode = SystemZISD::VSTRCZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfcedbs: + Opcode = SystemZISD::VFCMPES; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vfchdbs: + Opcode = SystemZISD::VFCMPHS; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vfchedbs: + Opcode = SystemZISD::VFCMPHES; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vftcidb: + Opcode = SystemZISD::VFTCI; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + default: + return false; + } +} + // Emit an intrinsic with chain with a glued value instead of its CC result. static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, unsigned Opcode) { @@ -1221,6 +1414,23 @@ static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, return Intr; } +// Emit an intrinsic with a glued value instead of its CC result. +static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op, + unsigned Opcode) { + // Copy all operands except the intrinsic ID. + unsigned NumOps = Op.getNumOperands(); + SmallVector Ops; + Ops.reserve(NumOps - 1); + for (unsigned I = 1; I < NumOps; ++I) + Ops.push_back(Op.getOperand(I)); + + if (Op->getNumValues() == 1) + return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops); + assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result"); + SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue); + return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); +} + // CC is a comparison that will be implemented using an integer or // floating-point comparison. Return the condition code mask for // a branch on true. In the integer case, CCMASK_CMP_UO is set for @@ -1797,17 +2007,17 @@ static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, else if (Cond == ISD::SETLT || Cond == ISD::SETULT) // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, // always true for CC>3. - C.CCMask = CC < 4 ? -1 << (4 - CC) : -1; + C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1; else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) // ...and the inverse of that. - C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0; + C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0; else if (Cond == ISD::SETLE || Cond == ISD::SETULE) // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), // always true for CC>3. - C.CCMask = CC < 4 ? -1 << (3 - CC) : -1; + C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1; else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) // ...and the inverse of that. - C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0; + C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0; else llvm_unreachable("Unexpected integer comparison type"); C.CCMask &= CCValid; @@ -1824,6 +2034,10 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); + if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && + CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 && + isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) + return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); } Comparison C(CmpOp0, CmpOp1); C.CCMask = CCMaskForCondCode(Cond); @@ -1872,6 +2086,9 @@ static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { case ISD::INTRINSIC_W_CHAIN: Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode); break; + case ISD::INTRINSIC_WO_CHAIN: + Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode); + break; default: llvm_unreachable("Invalid comparison operands"); } @@ -1957,14 +2174,14 @@ static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) { case ISD::SETOGE: case ISD::SETGE: - return IsFP ? SystemZISD::VFCMPHE : 0; + return IsFP ? SystemZISD::VFCMPHE : static_cast(0); case ISD::SETOGT: case ISD::SETGT: return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH; case ISD::SETUGT: - return IsFP ? 0 : SystemZISD::VICMPHL; + return IsFP ? static_cast(0) : SystemZISD::VICMPHL; default: return 0; @@ -2186,7 +2403,7 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, SDLoc DL(Node); const GlobalValue *GV = Node->getGlobal(); int64_t Offset = Node->getOffset(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); Reloc::Model RM = DAG.getTarget().getRelocationModel(); CodeModel::Model CM = DAG.getTarget().getCodeModel(); @@ -2225,7 +2442,7 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, unsigned Opcode, SDValue GOTOffset) const { SDLoc DL(Node); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Chain = DAG.getEntryNode(); SDValue Glue; @@ -2271,7 +2488,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const { SDLoc DL(Node); const GlobalValue *GV = Node->getGlobal(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); TLSModel::Model model = DAG.getTarget().getTLSModel(GV); // The high part of the thread pointer is in access register 0. @@ -2372,7 +2589,7 @@ SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, SDLoc DL(Node); const BlockAddress *BA = Node->getBlockAddress(); int64_t Offset = Node->getOffset(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset); Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); @@ -2382,7 +2599,7 @@ SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const { SDLoc DL(JT); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); // Use LARL to load the address of the table. @@ -2392,7 +2609,7 @@ SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const { SDLoc DL(CP); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result; if (CP->isMachineConstantPoolEntry()) @@ -2456,7 +2673,7 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); SystemZMachineFunctionInfo *FuncInfo = MF.getInfo(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Chain = Op.getOperand(0); SDValue Addr = Op.getOperand(1); @@ -3006,6 +3223,67 @@ SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, return SDValue(); } +SDValue +SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned Opcode, CCValid; + if (isIntrinsicWithCC(Op, Opcode, CCValid)) { + SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode); + SDValue CC = getCCResult(DAG, Glued.getNode()); + if (Op->getNumValues() == 1) + return CC; + assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result"); + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), + Glued, CC); + } + + unsigned Id = cast(Op.getOperand(0))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_vpdi: + return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::s390_vperm: + return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::s390_vuphb: + case Intrinsic::s390_vuphh: + case Intrinsic::s390_vuphf: + return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vuplhb: + case Intrinsic::s390_vuplhh: + case Intrinsic::s390_vuplhf: + return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vuplb: + case Intrinsic::s390_vuplhw: + case Intrinsic::s390_vuplf: + return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vupllb: + case Intrinsic::s390_vupllh: + case Intrinsic::s390_vupllf: + return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vsumb: + case Intrinsic::s390_vsumh: + case Intrinsic::s390_vsumgh: + case Intrinsic::s390_vsumgf: + case Intrinsic::s390_vsumqf: + case Intrinsic::s390_vsumqg: + return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } + + return SDValue(); +} + namespace { // Says that SystemZISD operation Opcode can be used to perform the equivalent // of a VPERM with permute vector Bytes. If Opcode takes three operands, @@ -3521,7 +3799,7 @@ static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { for (unsigned J = 0; J < BytesPerElement; ++J) { uint64_t Byte = (Value >> (J * 8)) & 0xff; if (Byte == 0xff) - Mask |= 1 << ((E - I - 1) * BytesPerElement + J); + Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J); else if (Byte != 0) return false; } @@ -3910,6 +4188,23 @@ SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, return DAG.getNode(ISD::BITCAST, DL, VT, Res); } +SDValue +SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG, + unsigned UnpackHigh) const { + SDValue PackedOp = Op.getOperand(0); + EVT OutVT = Op.getValueType(); + EVT InVT = PackedOp.getValueType(); + unsigned ToBits = OutVT.getVectorElementType().getSizeInBits(); + unsigned FromBits = InVT.getVectorElementType().getSizeInBits(); + do { + FromBits *= 2; + EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), + SystemZ::VectorBits / FromBits); + PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp); + } while (FromBits != ToBits); + return PackedOp; +} + SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const { // Look for cases where a vector shift can use the *_BY_SCALAR form. @@ -4048,6 +4343,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerPREFETCH(Op, DAG); case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: + return lowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: @@ -4058,6 +4355,10 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::SIGN_EXTEND_VECTOR_INREG: + return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH); + case ISD::ZERO_EXTEND_VECTOR_INREG: + return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH); case ISD::SHL: return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR); case ISD::SRL: @@ -4071,7 +4372,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME - switch (Opcode) { + switch ((SystemZISD::NodeType)Opcode) { + case SystemZISD::FIRST_NUMBER: break; OPCODE(RET_FLAG); OPCODE(CALL); OPCODE(SIBCALL); @@ -4122,6 +4424,12 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(PERMUTE_DWORDS); OPCODE(PERMUTE); OPCODE(PACK); + OPCODE(PACKS_CC); + OPCODE(PACKLS_CC); + OPCODE(UNPACK_HIGH); + OPCODE(UNPACKL_HIGH); + OPCODE(UNPACK_LOW); + OPCODE(UNPACKL_LOW); OPCODE(VSHL_BY_SCALAR); OPCODE(VSRL_BY_SCALAR); OPCODE(VSRA_BY_SCALAR); @@ -4129,11 +4437,28 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(VICMPE); OPCODE(VICMPH); OPCODE(VICMPHL); + OPCODE(VICMPES); + OPCODE(VICMPHS); + OPCODE(VICMPHLS); OPCODE(VFCMPE); OPCODE(VFCMPH); OPCODE(VFCMPHE); + OPCODE(VFCMPES); + OPCODE(VFCMPHS); + OPCODE(VFCMPHES); + OPCODE(VFTCI); OPCODE(VEXTEND); OPCODE(VROUND); + OPCODE(VTM); + OPCODE(VFAE_CC); + OPCODE(VFAEZ_CC); + OPCODE(VFEE_CC); + OPCODE(VFEEZ_CC); + OPCODE(VFENE_CC); + OPCODE(VFENEZ_CC); + OPCODE(VISTR_CC); + OPCODE(VSTRC_CC); + OPCODE(VSTRCZ_CC); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); @@ -4334,17 +4659,35 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, } } } - // (z_merge_high 0, 0) -> 0. This is mostly useful for using VLLEZF - // for v4f32. - if (Opcode == SystemZISD::MERGE_HIGH) { + if (Opcode == SystemZISD::MERGE_HIGH || + Opcode == SystemZISD::MERGE_LOW) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); - if (Op0 == Op1) { - if (Op0.getOpcode() == ISD::BITCAST) - Op0 = Op0.getOperand(0); - if (Op0.getOpcode() == SystemZISD::BYTE_MASK && - cast(Op0.getOperand(0))->getZExtValue() == 0) + if (Op0.getOpcode() == ISD::BITCAST) + Op0 = Op0.getOperand(0); + if (Op0.getOpcode() == SystemZISD::BYTE_MASK && + cast(Op0.getOperand(0))->getZExtValue() == 0) { + // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF + // for v4f32. + if (Op1 == N->getOperand(0)) return Op1; + // (z_merge_? 0, X) -> (z_unpackl_? 0, X). + EVT VT = Op1.getValueType(); + unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); + if (ElemBytes <= 4) { + Opcode = (Opcode == SystemZISD::MERGE_HIGH ? + SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); + EVT InVT = VT.changeVectorElementTypeToInteger(); + EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16), + SystemZ::VectorBytes / ElemBytes / 2); + if (VT != InVT) { + Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1); + DCI.AddToWorklist(Op1.getNode()); + } + SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1); + DCI.AddToWorklist(Op.getNode()); + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); + } } } // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better