return Op;
}
-SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
+SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
const SystemZSubtarget &STI)
- : TargetLowering(tm), Subtarget(STI) {
- MVT PtrVT = getPointerTy();
+ : TargetLowering(TM), Subtarget(STI) {
+ MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
// Set up the register classes.
if (Subtarget.hasHighWord())
addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
else
addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
- addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
- addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
- addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
+ addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
+ if (Subtarget.hasVector()) {
+ addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
+ addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
+ } else {
+ addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
+ addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
+ }
addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
if (Subtarget.hasVector()) {
addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
}
computeRegisterProperties(Subtarget.getRegisterInfo());
// Set up special registers.
- setExceptionPointerRegister(SystemZ::R6D);
- setExceptionSelectorRegister(SystemZ::R7D);
setStackPointerRegisterToSaveRestore(SystemZ::R15D);
// TODO: It may be better to default to latency-oriented scheduling, however
if (isTypeLegal(VT)) {
// These operations are legal for anything that can be stored in a
// vector register, even if there is no native support for the format
- // as such.
+ // as such. In particular, we can do these for v4f32 even though there
+ // are no specific instructions for that format.
setOperationAction(ISD::LOAD, VT, Legal);
setOperationAction(ISD::STORE, VT, Legal);
setOperationAction(ISD::VSELECT, VT, Legal);
// Convert a GPR scalar to a vector by inserting it into element 0.
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ // Use a series of unpacks for extensions.
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
+
// Detect shifts by a scalar amount and convert them into
// V*_BY_SCALAR.
setOperationAction(ISD::SHL, VT, Custom);
// No special instructions for these.
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
}
}
// Handle floating-point vector types.
if (Subtarget.hasVector()) {
// Scalar-to-vector conversion is just a subreg.
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
// Some insertions and extractions can be done directly but others
// need to go via integers.
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
// These operations have direct equivalents.
// We have 64-bit FPR<->GPR moves, but need special handling for
// 32-bit forms.
- setOperationAction(ISD::BITCAST, MVT::i32, Custom);
- setOperationAction(ISD::BITCAST, MVT::f32, Custom);
+ if (!Subtarget.hasVector()) {
+ setOperationAction(ISD::BITCAST, MVT::i32, Custom);
+ setOperationAction(ISD::BITCAST, MVT::f32, Custom);
+ }
// VASTART and VACOPY need to deal with the SystemZ-specific varargs
// structure, but VAEND is a no-op.
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::FP_ROUND);
// Handle intrinsics.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
// We want to use MVC in preference to even a single load/store pair.
MaxStoresPerMemcpy = 0;
MaxStoresPerMemsetOptSize = 0;
}
-EVT SystemZTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
+ LLVMContext &, EVT VT) const {
if (!VT.isVector())
return MVT::i32;
return VT.changeVectorElementTypeToInteger();
*Fast = true;
return true;
}
-
-bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
+
+bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
+ unsigned AS) const {
// Punt on globals for now, although they can be used in limited
// RELATIVE LONG cases.
if (AM.BaseGV)
//===----------------------------------------------------------------------===//
TargetLowering::ConstraintType
-SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
+SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'a': // Address register
// has already been verified. MC is the class associated with "t" and
// Map maps 0-based register numbers to LLVM register numbers.
static std::pair<unsigned, const TargetRegisterClass *>
-parseRegisterNumber(const std::string &Constraint,
- const TargetRegisterClass *RC, const unsigned *Map) {
+parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC,
+ const unsigned *Map) {
assert(*(Constraint.end()-1) == '}' && "Missing '}'");
if (isdigit(Constraint[2])) {
- std::string Suffix(Constraint.data() + 2, Constraint.size() - 2);
- unsigned Index = atoi(Suffix.c_str());
- if (Index < 16 && Map[Index])
+ unsigned Index;
+ bool Failed =
+ Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
+ if (!Failed && Index < 16 && Map[Index])
return std::make_pair(Map[Index], RC);
}
return std::make_pair(0U, nullptr);
std::pair<unsigned, const TargetRegisterClass *>
SystemZTargetLowering::getRegForInlineAsmConstraint(
- const TargetRegisterInfo *TRI, const std::string &Constraint,
- MVT VT) const {
+ const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
if (Constraint.size() == 1) {
// GCC Constraint Letters
switch (Constraint[0]) {
return std::make_pair(0U, &SystemZ::FP32BitRegClass);
}
}
- if (Constraint[0] == '{') {
+ if (Constraint.size() > 0 && Constraint[0] == '{') {
// We need to override the default register parsing for GPRs and FPRs
// because the interpretation depends on VT. The internal names of
// the registers are also different from the external names
}
bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
- if (!CI->isTailCall())
- return false;
- return true;
+ return CI->isTailCall();
+}
+
+// We do not yet support 128-bit single-element vector types. If the user
+// attempts to use such types as function argument or return type, prefer
+// to error out instead of emitting code violating the ABI.
+static void VerifyVectorType(MVT VT, EVT ArgVT) {
+ if (ArgVT.isVector() && !VT.isVector())
+ report_fatal_error("Unsupported vector argument or return type");
+}
+
+static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
+ for (unsigned i = 0; i < Ins.size(); ++i)
+ VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
+}
+
+static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+ for (unsigned i = 0; i < Outs.size(); ++i)
+ VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
}
// Value is a value that has been passed to us in the location described by VA
else if (VA.getLocInfo() == CCValAssign::Indirect)
Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value,
MachinePointerInfo(), false, false, false, 0);
- else
+ else if (VA.getLocInfo() == CCValAssign::BCvt) {
+ // If this is a short vector argument loaded from the stack,
+ // extend from i64 to full vector size and then bitcast.
+ assert(VA.getLocVT() == MVT::i64);
+ assert(VA.getValVT().isVector());
+ Value = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i64,
+ Value, DAG.getUNDEF(MVT::i64));
+ Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
+ } else
assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
return Value;
}
return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
case CCValAssign::AExt:
return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
+ case CCValAssign::BCvt:
+ // If this is a short vector argument to be stored to the stack,
+ // bitcast to v2i64 and then extract first element.
+ assert(VA.getLocVT() == MVT::i64);
+ assert(VA.getValVT().isVector());
+ Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
+ DAG.getConstant(0, DL, MVT::i32));
case CCValAssign::Full:
return Value;
default:
auto *TFL =
static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
+ // Detect unsupported vector argument types.
+ if (Subtarget.hasVector())
+ VerifyVectorTypes(Ins);
+
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
+ case MVT::v4f32:
case MVT::v2f64:
RC = &SystemZ::VR128BitRegClass;
break;
// Create the SelectionDAG nodes corresponding to a load
// from this parameter. Unpromoted ints and floats are
// passed as right-justified 8-byte values.
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
DAG.getIntPtrConstant(4, DL));
ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
+ MachinePointerInfo::getFixedStack(MF, FI), false,
+ false, false, 0);
}
// Convert the value of the argument register into the value that's
for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true);
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
&SystemZ::FP64BitRegClass);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
- MachinePointerInfo::getFixedStack(FI),
+ MachinePointerInfo::getFixedStack(MF, FI),
false, false, 0);
-
}
// Join the stores, which are independent of one another.
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
CallingConv::ID CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(MF.getDataLayout());
+
+ // Detect unsupported vector argument and return types.
+ if (Subtarget.hasVector()) {
+ VerifyVectorTypes(Outs);
+ VerifyVectorTypes(Ins);
+ }
// Analyze the operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
// Store the argument in a stack slot and pass its address.
SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
- MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, SpillSlot,
- MachinePointerInfo::getFixedStack(FI),
- false, false, 0));
+ MemOpChains.push_back(DAG.getStore(
+ Chain, DL, ArgValue, SpillSlot,
+ MachinePointerInfo::getFixedStack(MF, FI), false, false, 0));
ArgValue = SpillSlot;
} else
ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
return Chain;
}
+bool SystemZTargetLowering::
+CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ // Detect unsupported vector return types.
+ if (Subtarget.hasVector())
+ VerifyVectorTypes(Outs);
+
+ SmallVector<CCValAssign, 16> RetLocs;
+ CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
+ return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
+}
+
SDValue
SystemZTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool IsVarArg,
SDLoc DL, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
+ // Detect unsupported vector return types.
+ if (Subtarget.hasVector())
+ VerifyVectorTypes(Outs);
+
// Assign locations to each returned value.
SmallVector<CCValAssign, 16> RetLocs;
CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
}
}
+// Return true if Op is an intrinsic node without chain that returns the
+// CC value as its final argument. Provide the associated SystemZISD
+// opcode and the mask of valid CC values if so.
+static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
+ unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (Id) {
+ case Intrinsic::s390_vpkshs:
+ case Intrinsic::s390_vpksfs:
+ case Intrinsic::s390_vpksgs:
+ Opcode = SystemZISD::PACKS_CC;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vpklshs:
+ case Intrinsic::s390_vpklsfs:
+ case Intrinsic::s390_vpklsgs:
+ Opcode = SystemZISD::PACKLS_CC;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vceqbs:
+ case Intrinsic::s390_vceqhs:
+ case Intrinsic::s390_vceqfs:
+ case Intrinsic::s390_vceqgs:
+ Opcode = SystemZISD::VICMPES;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vchbs:
+ case Intrinsic::s390_vchhs:
+ case Intrinsic::s390_vchfs:
+ case Intrinsic::s390_vchgs:
+ Opcode = SystemZISD::VICMPHS;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vchlbs:
+ case Intrinsic::s390_vchlhs:
+ case Intrinsic::s390_vchlfs:
+ case Intrinsic::s390_vchlgs:
+ Opcode = SystemZISD::VICMPHLS;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vtm:
+ Opcode = SystemZISD::VTM;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vfaebs:
+ case Intrinsic::s390_vfaehs:
+ case Intrinsic::s390_vfaefs:
+ Opcode = SystemZISD::VFAE_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vfaezbs:
+ case Intrinsic::s390_vfaezhs:
+ case Intrinsic::s390_vfaezfs:
+ Opcode = SystemZISD::VFAEZ_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vfeebs:
+ case Intrinsic::s390_vfeehs:
+ case Intrinsic::s390_vfeefs:
+ Opcode = SystemZISD::VFEE_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vfeezbs:
+ case Intrinsic::s390_vfeezhs:
+ case Intrinsic::s390_vfeezfs:
+ Opcode = SystemZISD::VFEEZ_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vfenebs:
+ case Intrinsic::s390_vfenehs:
+ case Intrinsic::s390_vfenefs:
+ Opcode = SystemZISD::VFENE_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vfenezbs:
+ case Intrinsic::s390_vfenezhs:
+ case Intrinsic::s390_vfenezfs:
+ Opcode = SystemZISD::VFENEZ_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vistrbs:
+ case Intrinsic::s390_vistrhs:
+ case Intrinsic::s390_vistrfs:
+ Opcode = SystemZISD::VISTR_CC;
+ CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
+ return true;
+
+ case Intrinsic::s390_vstrcbs:
+ case Intrinsic::s390_vstrchs:
+ case Intrinsic::s390_vstrcfs:
+ Opcode = SystemZISD::VSTRC_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vstrczbs:
+ case Intrinsic::s390_vstrczhs:
+ case Intrinsic::s390_vstrczfs:
+ Opcode = SystemZISD::VSTRCZ_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vfcedbs:
+ Opcode = SystemZISD::VFCMPES;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vfchdbs:
+ Opcode = SystemZISD::VFCMPHS;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vfchedbs:
+ Opcode = SystemZISD::VFCMPHES;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vftcidb:
+ Opcode = SystemZISD::VFTCI;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ default:
+ return false;
+ }
+}
+
// Emit an intrinsic with chain with a glued value instead of its CC result.
static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op,
unsigned Opcode) {
return Intr;
}
+// Emit an intrinsic with a glued value instead of its CC result.
+static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op,
+ unsigned Opcode) {
+ // Copy all operands except the intrinsic ID.
+ unsigned NumOps = Op.getNumOperands();
+ SmallVector<SDValue, 6> Ops;
+ Ops.reserve(NumOps - 1);
+ for (unsigned I = 1; I < NumOps; ++I)
+ Ops.push_back(Op.getOperand(I));
+
+ if (Op->getNumValues() == 1)
+ return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops);
+ assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result");
+ SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue);
+ return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
+}
+
// CC is a comparison that will be implemented using an integer or
// floating-point comparison. Return the condition code mask for
// a branch on true. In the integer case, CCMASK_CMP_UO is set for
} else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
if (Value > Mask)
return;
- assert(C.ICmpType == SystemZICMP::Any &&
- "Signedness shouldn't matter here.");
+ // If the constant is in range, we can use any comparison.
+ C.ICmpType = SystemZICMP::Any;
} else
return;
if (CCMask == SystemZ::CCMASK_CMP_NE)
return SystemZ::CCMASK_TM_SOME_1;
}
- if (EffectivelyUnsigned && CmpVal <= Low) {
+ if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
if (CCMask == SystemZ::CCMASK_CMP_LT)
return SystemZ::CCMASK_TM_ALL_0;
if (CCMask == SystemZ::CCMASK_CMP_GE)
else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
// bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
// always true for CC>3.
- C.CCMask = CC < 4 ? -1 << (4 - CC) : -1;
+ C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
// ...and the inverse of that.
- C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0;
+ C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
// bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
// always true for CC>3.
- C.CCMask = CC < 4 ? -1 << (3 - CC) : -1;
+ C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
// ...and the inverse of that.
- C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0;
+ C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
else
llvm_unreachable("Unexpected integer comparison type");
C.CCMask &= CCValid;
CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
+ if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
+ isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
+ return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
}
Comparison C(CmpOp0, CmpOp1);
C.CCMask = CCMaskForCondCode(Cond);
case ISD::INTRINSIC_W_CHAIN:
Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
break;
+ case ISD::INTRINSIC_WO_CHAIN:
+ Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode);
+ break;
default:
llvm_unreachable("Invalid comparison operands");
}
case ISD::SETOGE:
case ISD::SETGE:
- return IsFP ? SystemZISD::VFCMPHE : 0;
+ return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0);
case ISD::SETOGT:
case ISD::SETGT:
return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
case ISD::SETUGT:
- return IsFP ? 0 : SystemZISD::VICMPHL;
+ return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL;
default:
return 0;
return 0;
}
+// Return a v2f64 that contains the extended form of elements Start and Start+1
+// of v4f32 value Op.
+static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL,
+ SDValue Op) {
+ int Mask[] = { Start, -1, Start + 1, -1 };
+ Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
+ return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
+}
+
+// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
+// producing a result of type VT.
+static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL,
+ EVT VT, SDValue CmpOp0, SDValue CmpOp1) {
+ // There is no hardware support for v4f32, so extend the vector into
+ // two v2f64s and compare those.
+ if (CmpOp0.getValueType() == MVT::v4f32) {
+ SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
+ SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
+ SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
+ SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
+ SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
+ SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
+ return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
+ }
+ return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
+}
+
// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
// an integer mask of type VT.
static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT,
Invert = true;
case ISD::SETO: {
assert(IsFP && "Unexpected integer comparison");
- SDValue LT = DAG.getNode(SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
- SDValue GE = DAG.getNode(SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
+ SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
+ SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
break;
}
Invert = true;
case ISD::SETONE: {
assert(IsFP && "Unexpected integer comparison");
- SDValue LT = DAG.getNode(SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
- SDValue GT = DAG.getNode(SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
+ SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
+ SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
break;
}
// there are no cases where both work.
default:
if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
- Cmp = DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
+ Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
else {
CC = ISD::getSetCCSwappedOperands(CC);
if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
- Cmp = DAG.getNode(Opcode, DL, VT, CmpOp1, CmpOp0);
+ Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
else
llvm_unreachable("Unhandled comparison");
}
SDLoc DL(Node);
const GlobalValue *GV = Node->getGlobal();
int64_t Offset = Node->getOffset();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
Reloc::Model RM = DAG.getTarget().getRelocationModel();
CodeModel::Model CM = DAG.getTarget().getCodeModel();
Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
}
// If there was a non-zero offset that we didn't fold, create an explicit
unsigned Opcode,
SDValue GOTOffset) const {
SDLoc DL(Node);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Chain = DAG.getEntryNode();
SDValue Glue;
}
SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(Node, DAG);
SDLoc DL(Node);
const GlobalValue *GV = Node->getGlobal();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
// The high part of the thread pointer is in access register 0.
SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
Offset = DAG.getConstantPool(CPV, PtrVT, 8);
- Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
- Offset, MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
// Call __tls_get_offset to retrieve the offset.
Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
Offset = DAG.getConstantPool(CPV, PtrVT, 8);
- Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
- Offset, MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
// Call __tls_get_offset to retrieve the module base offset.
Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
- DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
- DTPOffset, MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ DTPOffset = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), DTPOffset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
break;
Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
SystemZII::MO_INDNTPOFF);
Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
- Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
- Offset, MachinePointerInfo::getGOT(),
+ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
false, false, false, 0);
break;
}
SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
Offset = DAG.getConstantPool(CPV, PtrVT, 8);
- Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
- Offset, MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
break;
}
}
SDLoc DL(Node);
const BlockAddress *BA = Node->getBlockAddress();
int64_t Offset = Node->getOffset();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
SelectionDAG &DAG) const {
SDLoc DL(JT);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
// Use LARL to load the address of the table.
SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
SelectionDAG &DAG) const {
SDLoc DL(CP);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result;
if (CP->isMachineConstantPoolEntry())
Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
- CP->getAlignment());
+ CP->getAlignment());
else
Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
- CP->getAlignment(), CP->getOffset());
+ CP->getAlignment(), CP->getOffset());
// Use LARL to load the address of the constant pool entry.
return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
MachineFunction &MF = DAG.getMachineFunction();
SystemZMachineFunctionInfo *FuncInfo =
MF.getInfo<SystemZMachineFunctionInfo>();
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Chain = Op.getOperand(0);
SDValue Addr = Op.getOperand(1);
SDValue SystemZTargetLowering::
lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ bool RealignOpt = !DAG.getMachineFunction().getFunction()->
+ hasFnAttribute("no-realign-stack");
+
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
+ SDValue Align = Op.getOperand(2);
SDLoc DL(Op);
+ // If user has set the no alignment function attribute, ignore
+ // alloca alignments.
+ uint64_t AlignVal = (RealignOpt ?
+ dyn_cast<ConstantSDNode>(Align)->getZExtValue() : 0);
+
+ uint64_t StackAlign = TFI->getStackAlignment();
+ uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
+ uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
+
unsigned SPReg = getStackPointerRegisterToSaveRestore();
+ SDValue NeededSpace = Size;
// Get a reference to the stack pointer.
SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
+ // Add extra space for alignment if needed.
+ if (ExtraAlignSpace)
+ NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
+ DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
+
// Get the new stack pointer value.
- SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, Size);
+ SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
// Copy the new stack pointer back.
Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
+ // Dynamically realign if needed.
+ if (RequiredAlign > StackAlign) {
+ Result =
+ DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
+ DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
+ Result =
+ DAG.getNode(ISD::AND, DL, MVT::i64, Result,
+ DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
+ }
+
SDValue Ops[2] = { Result, Chain };
return DAG.getMergeValues(Ops, DL);
}
} else if (DAG.ComputeNumSignBits(Op1) > 32) {
Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
Opcode = SystemZISD::SDIVREM32;
- } else
+ } else
Opcode = SystemZISD::SDIVREM64;
// DSG(F) takes a 64-bit dividend, so the even register in the GR128
return SDValue();
}
+SDValue
+SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned Opcode, CCValid;
+ if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
+ SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode);
+ SDValue CC = getCCResult(DAG, Glued.getNode());
+ if (Op->getNumValues() == 1)
+ return CC;
+ assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
+ return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), Glued,
+ CC);
+ }
+
+ unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (Id) {
+ case Intrinsic::s390_vpdi:
+ return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+ case Intrinsic::s390_vperm:
+ return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+ case Intrinsic::s390_vuphb:
+ case Intrinsic::s390_vuphh:
+ case Intrinsic::s390_vuphf:
+ return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1));
+
+ case Intrinsic::s390_vuplhb:
+ case Intrinsic::s390_vuplhh:
+ case Intrinsic::s390_vuplhf:
+ return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1));
+
+ case Intrinsic::s390_vuplb:
+ case Intrinsic::s390_vuplhw:
+ case Intrinsic::s390_vuplf:
+ return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1));
+
+ case Intrinsic::s390_vupllb:
+ case Intrinsic::s390_vupllh:
+ case Intrinsic::s390_vupllf:
+ return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1));
+
+ case Intrinsic::s390_vsumb:
+ case Intrinsic::s390_vsumh:
+ case Intrinsic::s390_vsumgh:
+ case Intrinsic::s390_vsumgf:
+ case Intrinsic::s390_vsumqf:
+ case Intrinsic::s390_vsumqg:
+ return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+
+ return SDValue();
+}
+
namespace {
// Says that SystemZISD operation Opcode can be used to perform the equivalent
// of a VPERM with permute vector Bytes. If Opcode takes three operands,
for (unsigned J = 0; J < BytesPerElement; ++J) {
uint64_t Byte = (Value >> (J * 8)) & 0xff;
if (Byte == 0xff)
- Mask |= 1 << ((E - I - 1) * BytesPerElement + J);
+ Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J);
else if (Byte != 0)
return false;
}
GS.addUndef();
} else {
GS.add(SDValue(), ResidueOps.size());
- ResidueOps.push_back(Op);
+ ResidueOps.push_back(BVN->getOperand(I));
}
}
// Create the BUILD_VECTOR for the remaining elements, if any.
if (!ResidueOps.empty()) {
while (ResidueOps.size() < NumElements)
- ResidueOps.push_back(DAG.getUNDEF(VT.getVectorElementType()));
+ ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
for (auto &Op : GS.Ops) {
if (!Op.getNode()) {
Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps);
if (VT == MVT::v2f64)
return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
+ // Build v4f32 values directly from the FPRs:
+ //
+ // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
+ // V V VMRHF
+ // <ABxx> <CDxx>
+ // V VMRHG
+ // <ABCD>
+ if (VT == MVT::v4f32) {
+ SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
+ SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
+ // Avoid unnecessary undefs by reusing the other operand.
+ if (Op01.getOpcode() == ISD::UNDEF)
+ Op01 = Op23;
+ else if (Op23.getOpcode() == ISD::UNDEF)
+ Op23 = Op01;
+ // Merging identical replications is a no-op.
+ if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
+ return Op01;
+ Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
+ Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
+ SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
+ DL, MVT::v2i64, Op01, Op23);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ }
+
// Collect the constant terms.
SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
SDValue Op2 = Op.getOperand(2);
EVT VT = Op.getValueType();
- // Insertions into constant indices can be done using VPDI. However,
- // if the inserted value is a bitcast or a constant then it's better
- // to use GPRs, as below.
- if (Op1.getOpcode() != ISD::BITCAST &&
+ // Insertions into constant indices of a v2f64 can be done using VPDI.
+ // However, if the inserted value is a bitcast or a constant then it's
+ // better to use GPRs, as below.
+ if (VT == MVT::v2f64 &&
+ Op1.getOpcode() != ISD::BITCAST &&
Op1.getOpcode() != ISD::ConstantFP &&
Op2.getOpcode() == ISD::Constant) {
uint64_t Index = dyn_cast<ConstantSDNode>(Op2)->getZExtValue();
return DAG.getNode(ISD::BITCAST, DL, VT, Res);
}
+SDValue
+SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
+ unsigned UnpackHigh) const {
+ SDValue PackedOp = Op.getOperand(0);
+ EVT OutVT = Op.getValueType();
+ EVT InVT = PackedOp.getValueType();
+ unsigned ToBits = OutVT.getVectorElementType().getSizeInBits();
+ unsigned FromBits = InVT.getVectorElementType().getSizeInBits();
+ do {
+ FromBits *= 2;
+ EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
+ SystemZ::VectorBits / FromBits);
+ PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
+ } while (FromBits != ToBits);
+ return PackedOp;
+}
+
SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
unsigned ByScalar) const {
// Look for cases where a vector shift can use the *_BY_SCALAR form.
return lowerPREFETCH(Op, DAG);
case ISD::INTRINSIC_W_CHAIN:
return lowerINTRINSIC_W_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN:
+ return lowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:
return lowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
case ISD::SHL:
return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
case ISD::SRL:
const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
- switch (Opcode) {
+ switch ((SystemZISD::NodeType)Opcode) {
+ case SystemZISD::FIRST_NUMBER: break;
OPCODE(RET_FLAG);
OPCODE(CALL);
OPCODE(SIBCALL);
OPCODE(PERMUTE_DWORDS);
OPCODE(PERMUTE);
OPCODE(PACK);
+ OPCODE(PACKS_CC);
+ OPCODE(PACKLS_CC);
+ OPCODE(UNPACK_HIGH);
+ OPCODE(UNPACKL_HIGH);
+ OPCODE(UNPACK_LOW);
+ OPCODE(UNPACKL_LOW);
OPCODE(VSHL_BY_SCALAR);
OPCODE(VSRL_BY_SCALAR);
OPCODE(VSRA_BY_SCALAR);
OPCODE(VICMPE);
OPCODE(VICMPH);
OPCODE(VICMPHL);
+ OPCODE(VICMPES);
+ OPCODE(VICMPHS);
+ OPCODE(VICMPHLS);
OPCODE(VFCMPE);
OPCODE(VFCMPH);
OPCODE(VFCMPHE);
+ OPCODE(VFCMPES);
+ OPCODE(VFCMPHS);
+ OPCODE(VFCMPHES);
+ OPCODE(VFTCI);
+ OPCODE(VEXTEND);
+ OPCODE(VROUND);
+ OPCODE(VTM);
+ OPCODE(VFAE_CC);
+ OPCODE(VFAEZ_CC);
+ OPCODE(VFEE_CC);
+ OPCODE(VFEEZ_CC);
+ OPCODE(VFENE_CC);
+ OPCODE(VFENEZ_CC);
+ OPCODE(VISTR_CC);
+ OPCODE(VSTRC_CC);
+ OPCODE(VSTRCZ_CC);
OPCODE(ATOMIC_SWAPW);
OPCODE(ATOMIC_LOADW_ADD);
OPCODE(ATOMIC_LOADW_SUB);
}
return Op;
} else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
- Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
- Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
- canTreatAsByteVector(Op.getValueType()) &&
+ Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+ canTreatAsByteVector(Op.getValueType()) &&
canTreatAsByteVector(Op.getOperand(0).getValueType())) {
// Make sure that only the unextended bits are significant.
EVT ExtVT = Op.getValueType();
unsigned SubByte = Byte % ExtBytesPerElement;
unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
if (SubByte < MinSubByte ||
- SubByte + BytesPerElement > ExtBytesPerElement)
- break;
+ SubByte + BytesPerElement > ExtBytesPerElement)
+ break;
// Get the byte offset of the unextended element
Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
// ...then add the byte offset relative to that element.
Byte += SubByte - MinSubByte;
if (Byte % BytesPerElement != 0)
- break;
+ break;
Op = Op.getOperand(0);
Index = Byte / BytesPerElement;
Force = true;
}
}
}
+ if (Opcode == SystemZISD::MERGE_HIGH ||
+ Opcode == SystemZISD::MERGE_LOW) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() == ISD::BITCAST)
+ Op0 = Op0.getOperand(0);
+ if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
+ cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
+ // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
+ // for v4f32.
+ if (Op1 == N->getOperand(0))
+ return Op1;
+ // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
+ EVT VT = Op1.getValueType();
+ unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
+ if (ElemBytes <= 4) {
+ Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
+ SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
+ EVT InVT = VT.changeVectorElementTypeToInteger();
+ EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
+ SystemZ::VectorBytes / ElemBytes / 2);
+ if (VT != InVT) {
+ Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
+ DCI.AddToWorklist(Op1.getNode());
+ }
+ SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
+ DCI.AddToWorklist(Op.getNode());
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
+ }
+ }
+ }
// If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
// for the extraction to be done on a vMiN value, so that we can use VSTE.
// If X has wider elements then convert it to:
N->getOperand(0) == N->getOperand(1))
return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
N->getOperand(0));
+ // (fround (extract_vector_elt X 0))
+ // (fround (extract_vector_elt X 1)) ->
+ // (extract_vector_elt (VROUND X) 0)
+ // (extract_vector_elt (VROUND X) 1)
+ //
+ // This is a special case since the target doesn't really support v2f32s.
+ if (Opcode == ISD::FP_ROUND) {
+ SDValue Op0 = N->getOperand(0);
+ if (N->getValueType(0) == MVT::f32 &&
+ Op0.hasOneUse() &&
+ Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op0.getOperand(0).getValueType() == MVT::v2f64 &&
+ Op0.getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
+ SDValue Vec = Op0.getOperand(0);
+ for (auto *U : Vec->uses()) {
+ if (U != Op0.getNode() &&
+ U->hasOneUse() &&
+ U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ U->getOperand(0) == Vec &&
+ U->getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
+ SDValue OtherRound = SDValue(*U->use_begin(), 0);
+ if (OtherRound.getOpcode() == ISD::FP_ROUND &&
+ OtherRound.getOperand(0) == SDValue(U, 0) &&
+ OtherRound.getValueType() == MVT::f32) {
+ SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
+ MVT::v4f32, Vec);
+ DCI.AddToWorklist(VRound.getNode());
+ SDValue Extract1 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
+ VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
+ DCI.AddToWorklist(Extract1.getNode());
+ DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
+ SDValue Extract0 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
+ VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+ return Extract0;
+ }
+ }
+ }
+ }
+ }
return SDValue();
}
return MBB;
}
+MachineBasicBlock *
+SystemZTargetLowering::emitLoadAndTestCmp0(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode) const {
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ DebugLoc DL = MI->getDebugLoc();
+
+ unsigned SrcReg = MI->getOperand(0).getReg();
+
+ // Create new virtual register of the same class as source.
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ unsigned DstReg = MRI->createVirtualRegister(RC);
+
+ // Replace pseudo with a normal load-and-test that models the def as
+ // well.
+ BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
+ .addReg(SrcReg);
+ MI->eraseFromParent();
+
+ return MBB;
+}
+
MachineBasicBlock *SystemZTargetLowering::
EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
switch (MI->getOpcode()) {
return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
case SystemZ::TBEGINC:
return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
+ case SystemZ::LTEBRCompare_VecPseudo:
+ return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
+ case SystemZ::LTDBRCompare_VecPseudo:
+ return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
+ case SystemZ::LTXBRCompare_VecPseudo:
+ return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
+
default:
llvm_unreachable("Unexpected instr type to insert");
}