From 54786a0936bf0ba3d83be3e8fd32f1488ba9e709 Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Fri, 28 Nov 2014 21:29:14 +0000 Subject: [PATCH] Revert "Masked Vector Load and Store Intrinsics." This reverts commit r222632 (and follow-up r222636), which caused a host of LNT failures on an internal bot. I'll respond to the commit on the list with a reproduction of one of the failures. Conflicts: lib/Target/X86/X86TargetTransformInfo.cpp git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222936 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/TargetTransformInfo.h | 7 - include/llvm/CodeGen/ISDOpcodes.h | 3 - include/llvm/CodeGen/SelectionDAG.h | 4 - include/llvm/CodeGen/SelectionDAGNodes.h | 68 -------- include/llvm/IR/IRBuilder.h | 11 -- include/llvm/IR/Intrinsics.h | 11 +- include/llvm/IR/Intrinsics.td | 15 -- include/llvm/Target/TargetSelectionDAG.td | 13 -- lib/Analysis/TargetTransformInfo.cpp | 11 -- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 161 ------------------ .../SelectionDAG/LegalizeIntegerTypes.cpp | 23 --- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 4 - .../SelectionDAG/LegalizeVectorTypes.cpp | 114 ------------- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 54 ------ .../SelectionDAG/SelectionDAGBuilder.cpp | 70 -------- .../SelectionDAG/SelectionDAGBuilder.h | 2 - .../SelectionDAG/SelectionDAGDumper.cpp | 2 - lib/IR/Function.cpp | 18 +- lib/IR/IRBuilder.cpp | 26 --- lib/IR/Verifier.cpp | 13 -- lib/Target/X86/X86ISelLowering.cpp | 22 +-- lib/Target/X86/X86InstrAVX512.td | 75 -------- lib/Target/X86/X86InstrSSE.td | 55 ------ lib/Target/X86/X86TargetTransformInfo.cpp | 18 -- lib/Transforms/Vectorize/LoopVectorize.cpp | 98 ++--------- test/CodeGen/X86/masked_memop.ll | 73 -------- test/Transforms/LoopVectorize/X86/mask1.ll | 83 --------- test/Transforms/LoopVectorize/X86/mask2.ll | 84 --------- test/Transforms/LoopVectorize/X86/mask3.ll | 84 --------- test/Transforms/LoopVectorize/X86/mask4.ll | 83 --------- utils/TableGen/CodeGenTarget.cpp | 3 +- utils/TableGen/IntrinsicEmitter.cpp | 10 +- 32 files changed, 27 insertions(+), 1291 deletions(-) delete mode 100644 test/CodeGen/X86/masked_memop.ll delete mode 100644 test/Transforms/LoopVectorize/X86/mask1.ll delete mode 100644 test/Transforms/LoopVectorize/X86/mask2.ll delete mode 100644 test/Transforms/LoopVectorize/X86/mask3.ll delete mode 100644 test/Transforms/LoopVectorize/X86/mask4.ll diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 8af8f77ad67..9acaaa6f2eb 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -270,13 +270,6 @@ public: int64_t BaseOffset, bool HasBaseReg, int64_t Scale) const; - /// \brief Return true if the target works with masked instruction - /// AVX2 allows masks for consecutive load and store for i32 and i64 elements. - /// AVX-512 architecture will also allow masks for non-consecutive memory - /// accesses. - virtual bool isLegalPredicatedStore(Type *DataType, int Consecutive) const; - virtual bool isLegalPredicatedLoad (Type *DataType, int Consecutive) const; - /// \brief Return the cost of the scaling factor used in the addressing /// mode represented by AM for this target, for a load/store /// of the specified type. diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index 4f55ccac16e..bbf0ad30458 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -675,9 +675,6 @@ namespace ISD { ATOMIC_LOAD_UMIN, ATOMIC_LOAD_UMAX, - // Masked load and store - MLOAD, MSTORE, - /// This corresponds to the llvm.lifetime.* intrinsics. The first operand /// is the chain and the second operand is the alloca pointer. LIFETIME_START, LIFETIME_END, diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 4950797bb1e..fbdaf0d7fd2 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -866,10 +866,6 @@ public: SDValue getIndexedStore(SDValue OrigStoe, SDLoc dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM); - SDValue getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, - SDValue Mask, SDValue Src0, MachineMemOperand *MMO); - SDValue getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, - SDValue Ptr, SDValue Mask, MachineMemOperand *MMO); /// getSrcValue - Construct a node to track a Value* through the backend. SDValue getSrcValue(const Value *v); diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index acd178892d0..47158272412 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1177,8 +1177,6 @@ public: N->getOpcode() == ISD::ATOMIC_LOAD_UMAX || N->getOpcode() == ISD::ATOMIC_LOAD || N->getOpcode() == ISD::ATOMIC_STORE || - N->getOpcode() == ISD::MLOAD || - N->getOpcode() == ISD::MSTORE || N->isMemIntrinsic() || N->isTargetMemoryOpcode(); } @@ -1928,72 +1926,6 @@ public: } }; -/// MaskedLoadStoreSDNode - This is a base class is used to represent MLOAD and -/// MSTORE nodes -/// -class MaskedLoadStoreSDNode : public MemSDNode { - // Operands - SDUse Ops[4]; -public: - friend class SelectionDAG; - MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, DebugLoc dl, - SDValue *Operands, unsigned numOperands, - SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) - : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { - InitOperands(Ops, Operands, numOperands); - } - - // In the both nodes address is Op1, mask is Op2: - // MaskedLoadSDNode (Chain, ptr, mask, src0), src0 is a passthru value - // MaskedStoreSDNode (Chain, ptr, mask, data) - // Mask is a vector of i1 elements - const SDValue &getBasePtr() const { return getOperand(1); } - const SDValue &getMask() const { return getOperand(2); } - - static bool classof(const SDNode *N) { - return N->getOpcode() == ISD::MLOAD || - N->getOpcode() == ISD::MSTORE; - } -}; - -/// MaskedLoadSDNode - This class is used to represent an MLOAD node -/// -class MaskedLoadSDNode : public MaskedLoadStoreSDNode { -public: - friend class SelectionDAG; - MaskedLoadSDNode(unsigned Order, DebugLoc dl, - SDValue *Operands, unsigned numOperands, - SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) - : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, Operands, numOperands, - VTs, MemVT, MMO) - {} - - const SDValue &getSrc0() const { return getOperand(3); } - static bool classof(const SDNode *N) { - return N->getOpcode() == ISD::MLOAD; - } -}; - -/// MaskedStoreSDNode - This class is used to represent an MSTORE node -/// -class MaskedStoreSDNode : public MaskedLoadStoreSDNode { - -public: - friend class SelectionDAG; - MaskedStoreSDNode(unsigned Order, DebugLoc dl, - SDValue *Operands, unsigned numOperands, - SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) - : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, Operands, numOperands, - VTs, MemVT, MMO) - {} - - const SDValue &getData() const { return getOperand(3); } - - static bool classof(const SDNode *N) { - return N->getOpcode() == ISD::MSTORE; - } -}; - /// MachineSDNode - An SDNode that represents everything that will be needed /// to construct a MachineInstr. These nodes are created during the /// instruction selection proper phase. diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h index e564ca52adf..80ada57f4c8 100644 --- a/include/llvm/IR/IRBuilder.h +++ b/include/llvm/IR/IRBuilder.h @@ -429,22 +429,11 @@ public: /// If the pointer isn't i8* it will be converted. CallInst *CreateLifetimeEnd(Value *Ptr, ConstantInt *Size = nullptr); - /// \brief Create a call to Masked Load intrinsic - CallInst *CreateMaskedLoad(ArrayRef Ops); - - /// \brief Create a call to Masked Store intrinsic - CallInst *CreateMaskedStore(ArrayRef Ops); - /// \brief Create an assume intrinsic call that allows the optimizer to /// assume that the provided condition will be true. CallInst *CreateAssumption(Value *Cond); private: - /// \brief Create a call to a masked intrinsic with given Id. - /// Masked intrinsic has only one overloaded type - data type. - CallInst *CreateMaskedIntrinsic(unsigned Id, ArrayRef Ops, - Type *DataTy); - Value *getCastedInt8PtrValue(Value *Ptr); }; diff --git a/include/llvm/IR/Intrinsics.h b/include/llvm/IR/Intrinsics.h index 5236f195140..acc0e9e5d37 100644 --- a/include/llvm/IR/Intrinsics.h +++ b/include/llvm/IR/Intrinsics.h @@ -76,8 +76,7 @@ namespace Intrinsic { enum IITDescriptorKind { Void, VarArg, MMX, Metadata, Half, Float, Double, Integer, Vector, Pointer, Struct, - Argument, ExtendArgument, TruncArgument, HalfVecArgument, - SameVecWidthArgument + Argument, ExtendArgument, TruncArgument, HalfVecArgument } Kind; union { @@ -97,15 +96,13 @@ namespace Intrinsic { }; unsigned getArgumentNumber() const { assert(Kind == Argument || Kind == ExtendArgument || - Kind == TruncArgument || Kind == HalfVecArgument || - Kind == SameVecWidthArgument); + Kind == TruncArgument || Kind == HalfVecArgument); return Argument_Info >> 2; } ArgKind getArgumentKind() const { assert(Kind == Argument || Kind == ExtendArgument || - Kind == TruncArgument || Kind == HalfVecArgument || - Kind == SameVecWidthArgument); - return (ArgKind)(Argument_Info & 3); + Kind == TruncArgument || Kind == HalfVecArgument); + return (ArgKind)(Argument_Info&3); } static IITDescriptor get(IITDescriptorKind K, unsigned Field) { diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index 1344c0c6e19..98d48de5103 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -112,10 +112,6 @@ class LLVMMatchType // the intrinsic is overloaded, so the matched type should be declared as iAny. class LLVMExtendedType : LLVMMatchType; class LLVMTruncatedType : LLVMMatchType; -class LLVMVectorSameWidth - : LLVMMatchType { - ValueType ElTy = elty.VT; -} // Match the type of another intrinsic parameter that is expected to be a // vector type, but change the element count to be half as many @@ -543,17 +539,6 @@ def int_convertuu : Intrinsic<[llvm_anyint_ty], def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [], "llvm.clear_cache">; -//===-------------------------- Masked Intrinsics -------------------------===// -// -def int_masked_store : Intrinsic<[], [llvm_ptr_ty, llvm_anyvector_ty, - llvm_i32_ty, - LLVMVectorSameWidth<0, llvm_i1_ty>], - [IntrReadWriteArgMem]>; - -def int_masked_load : Intrinsic<[llvm_anyvector_ty], - [llvm_ptr_ty, LLVMMatchType<0>, llvm_i32_ty, - LLVMVectorSameWidth<0, llvm_i1_ty>], - [IntrReadArgMem]>; //===----------------------------------------------------------------------===// // Target-specific intrinsics //===----------------------------------------------------------------------===// diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index 907baa1b9b1..f63afd70983 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -188,14 +188,6 @@ def SDTIStore : SDTypeProfile<1, 3, [ // indexed store SDTCisSameAs<0, 2>, SDTCisPtrTy<0>, SDTCisPtrTy<3> ]>; -def SDTMaskedStore: SDTypeProfile<0, 3, [ // masked store - SDTCisPtrTy<0>, SDTCisVec<1>, SDTCisVec<2> -]>; - -def SDTMaskedLoad: SDTypeProfile<1, 3, [ // masked load - SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameAs<0, 3> -]>; - def SDTVecShuffle : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>; @@ -462,11 +454,6 @@ def atomic_load : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad, def atomic_store : SDNode<"ISD::ATOMIC_STORE", SDTAtomicStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; -def masked_store : SDNode<"ISD::MSTORE", SDTMaskedStore, - [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; -def masked_load : SDNode<"ISD::MLOAD", SDTMaskedLoad, - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; - // Do not use ld, st directly. Use load, extload, sextload, zextload, store, // and truncst (see below). def ld : SDNode<"ISD::LOAD" , SDTLoad, diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index e09e5f8c28b..c1ffb9daefa 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -101,17 +101,6 @@ bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const { return PrevTTI->isLegalICmpImmediate(Imm); } -bool TargetTransformInfo::isLegalPredicatedLoad(Type *DataType, - int Consecutive) const { - return false; -} - -bool TargetTransformInfo::isLegalPredicatedStore(Type *DataType, - int Consecutive) const { - return false; -} - - bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2cf5e753966..201429fe754 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -303,8 +303,6 @@ namespace { SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); SDValue visitINSERT_SUBVECTOR(SDNode *N); - SDValue visitMLOAD(SDNode *N); - SDValue visitMSTORE(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); @@ -414,7 +412,6 @@ namespace { EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(*DAG.getContext(), VT); } - int& MLD(); }; } @@ -1354,8 +1351,6 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); - case ISD::MLOAD: return visitMLOAD(N); - case ISD::MSTORE: return visitMSTORE(N); } return SDValue(); } @@ -4776,162 +4771,6 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); } -SDValue DAGCombiner::visitMSTORE(SDNode *N) { - - if (Level >= AfterLegalizeTypes) - return SDValue(); - - MaskedStoreSDNode *MST = dyn_cast(N); - SDValue Mask = MST->getMask(); - SDValue Data = MST->getData(); - SDLoc DL(N); - - // If the MSTORE data type requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - if (Mask.getOpcode() == ISD::SETCC) { - - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != - TargetLowering::TypeSplitVector) - return SDValue(); - - SDValue MaskLo, MaskHi, Lo, Hi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0)); - - SDValue Chain = MST->getChain(); - SDValue Ptr = MST->getBasePtr(); - - EVT MemoryVT = MST->getMemoryVT(); - unsigned Alignment = MST->getOriginalAlignment(); - - // if Alignment is equal to the vector size, - // take the half of it for the second part - unsigned SecondHalfAlignment = - (Alignment == Data->getValueType(0).getSizeInBits()/8) ? - Alignment/2 : Alignment; - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - SDValue DataLo, DataHi; - std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), - MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, MST->getAAInfo(), MST->getRanges()); - - Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, MMO); - - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); - - MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), - MachineMemOperand::MOStore, HiMemVT.getStoreSize(), - SecondHalfAlignment, MST->getAAInfo(), - MST->getRanges()); - - Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, MMO); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); - - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); - } - return SDValue(); -} - -SDValue DAGCombiner::visitMLOAD(SDNode *N) { - - if (Level >= AfterLegalizeTypes) - return SDValue(); - - MaskedLoadSDNode *MLD = dyn_cast(N); - SDValue Mask = MLD->getMask(); - SDLoc DL(N); - - // If the MLOAD result requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - - if (Mask.getOpcode() == ISD::SETCC) { - EVT VT = N->getValueType(0); - - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), VT) != - TargetLowering::TypeSplitVector) - return SDValue(); - - SDValue MaskLo, MaskHi, Lo, Hi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - SDValue Src0 = MLD->getSrc0(); - SDValue Src0Lo, Src0Hi; - std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); - - SDValue Chain = MLD->getChain(); - SDValue Ptr = MLD->getBasePtr(); - EVT MemoryVT = MLD->getMemoryVT(); - unsigned Alignment = MLD->getOriginalAlignment(); - - // if Alignment is equal to the vector size, - // take the half of it for the second part - unsigned SecondHalfAlignment = - (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? - Alignment/2 : Alignment; - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), - MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MLD->getAAInfo(), MLD->getRanges()); - - Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, MMO); - - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); - - MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), - MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), - SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); - - Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, MMO); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); - - // Build a factor node to remember that this load is independent of the - // other one. - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Legalized the chain result - switch anything that used the old chain to - // use the new one. - DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); - - SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); - - SDValue RetOps[] = { LoadRes, Chain }; - return DAG.getMergeValues(RetOps, DL); - } - return SDValue(); -} - SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 52c2d1be430..b73bb0a897b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -825,10 +825,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; case ISD::STORE: Res = PromoteIntOp_STORE(cast(N), OpNo); break; - case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast(N), - OpNo); break; - case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast(N), - OpNo); break; case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; case ISD::FP16_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; @@ -1095,25 +1091,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getMemoryVT(), N->getMemOperand()); } -SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){ - - assert(OpNo == 2 && "Only know how to promote the mask!"); - EVT DataVT = N->getOperand(3).getValueType(); - SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); - SmallVector NewOps(N->op_begin(), N->op_end()); - NewOps[OpNo] = Mask; - return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); -} - -SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){ - assert(OpNo == 2 && "Only know how to promote the mask!"); - EVT DataVT = N->getValueType(0); - SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); - SmallVector NewOps(N->op_begin(), N->op_end()); - NewOps[OpNo] = Mask; - return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); -} - SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 805b0fc0463..30f412ba317 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -285,8 +285,6 @@ private: SDValue PromoteIntOp_TRUNCATE(SDNode *N); SDValue PromoteIntOp_UINT_TO_FP(SDNode *N); SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); - SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); - SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -580,7 +578,6 @@ private: void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -597,7 +594,6 @@ private: SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); - SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); SDValue SplitVecOp_TRUNCATE(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 88f67370228..27f63d27823 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -597,9 +597,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi); break; - case ISD::MLOAD: - SplitVecRes_MLOAD(cast(N), Lo, Hi); - break; case ISD::SETCC: SplitVecRes_SETCC(N, Lo, Hi); break; @@ -982,64 +979,6 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, ReplaceValueWith(SDValue(LD, 1), Ch); } -void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, - SDValue &Lo, SDValue &Hi) { - EVT LoVT, HiVT; - SDLoc dl(MLD); - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); - - SDValue Ch = MLD->getChain(); - SDValue Ptr = MLD->getBasePtr(); - SDValue Mask = MLD->getMask(); - unsigned Alignment = MLD->getOriginalAlignment(); - - // if Alignment is equal to the vector size, - // take the half of it for the second part - unsigned SecondHalfAlignment = - (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? - Alignment/2 : Alignment; - - SDValue MaskLo, MaskHi; - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); - - EVT MemoryVT = MLD->getMemoryVT(); - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - SDValue Src0 = MLD->getSrc0(); - SDValue Src0Lo, Src0Hi; - std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), - MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MLD->getAAInfo(), MLD->getRanges()); - - Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, MMO); - - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); - - MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), - MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), - SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); - - Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, MMO); - - - // Build a factor node to remember that this load is independent of the - // other one. - Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Legalized the chain result - switch anything that used the old chain to - // use the new one. - ReplaceValueWith(SDValue(MLD, 1), Ch); - -} - void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && @@ -1295,9 +1234,6 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = SplitVecOp_STORE(cast(N), OpNo); break; - case ISD::MSTORE: - Res = SplitVecOp_MSTORE(cast(N), OpNo); - break; case ISD::VSELECT: Res = SplitVecOp_VSELECT(N, OpNo); break; @@ -1459,56 +1395,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { MachinePointerInfo(), EltVT, false, false, false, 0); } -SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, - unsigned OpNo) { - SDValue Ch = N->getChain(); - SDValue Ptr = N->getBasePtr(); - SDValue Mask = N->getMask(); - SDValue Data = N->getData(); - EVT MemoryVT = N->getMemoryVT(); - unsigned Alignment = N->getOriginalAlignment(); - SDLoc DL(N); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - SDValue DataLo, DataHi; - GetSplitVector(Data, DataLo, DataHi); - SDValue MaskLo, MaskHi; - GetSplitVector(Mask, MaskLo, MaskHi); - - // if Alignment is equal to the vector size, - // take the half of it for the second part - unsigned SecondHalfAlignment = - (Alignment == Data->getValueType(0).getSizeInBits()/8) ? - Alignment/2 : Alignment; - - SDValue Lo, Hi; - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), - MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, N->getAAInfo(), N->getRanges()); - - Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, MMO); - - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); - - MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), - MachineMemOperand::MOStore, HiMemVT.getStoreSize(), - SecondHalfAlignment, N->getAAInfo(), N->getRanges()); - - Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, MMO); - - - // Build a factor node to remember that this store is independent of the - // other one. - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); - -} - SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { assert(N->isUnindexed() && "Indexed store of vector?"); assert(OpNo == 1 && "Can only split the stored value"); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 57ec81bba4a..7961e66d8c8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4917,60 +4917,6 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, return SDValue(N, 0); } -SDValue -SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, - SDValue Ptr, SDValue Mask, SDValue Src0, - MachineMemOperand *MMO) { - - SDVTList VTs = getVTList(VT, MVT::Other); - SDValue Ops[] = { Chain, Ptr, Mask, Src0 }; - FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); - ID.AddInteger(VT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED, - MMO->isVolatile(), - MMO->isNonTemporal(), - MMO->isInvariant())); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(), - dl.getDebugLoc(), Ops, 4, VTs, - VT, MMO); - CSEMap.InsertNode(N, IP); - InsertNode(N); - return SDValue(N, 0); -} - -SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, - SDValue Ptr, SDValue Mask, MachineMemOperand *MMO) { - assert(Chain.getValueType() == MVT::Other && - "Invalid chain type"); - EVT VT = Val.getValueType(); - SDVTList VTs = getVTList(MVT::Other); - SDValue Ops[] = { Chain, Ptr, Mask, Val }; - FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); - ID.AddInteger(VT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal(), MMO->isInvariant())); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(), - dl.getDebugLoc(), Ops, 4, - VTs, VT, MMO); - CSEMap.InsertNode(N, IP); - InsertNode(N); - return SDValue(N, 0); -} - SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, SDValue SV, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 8618ab212ee..8f582f1aeb6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3613,70 +3613,6 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { DAG.setRoot(StoreNode); } -void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { - SDLoc sdl = getCurSDLoc(); - - Value *PtrOperand = I.getArgOperand(0); - SDValue Ptr = getValue(PtrOperand); - SDValue Src0 = getValue(I.getArgOperand(1)); - SDValue Mask = getValue(I.getArgOperand(3)); - EVT VT = Src0.getValueType(); - unsigned Alignment = (cast(I.getArgOperand(2)))->getZExtValue(); - if (!Alignment) - Alignment = DAG.getEVTAlignment(VT); - - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); - - MachineMemOperand *MMO = - DAG.getMachineFunction(). - getMachineMemOperand(MachinePointerInfo(PtrOperand), - MachineMemOperand::MOStore, VT.getStoreSize(), - Alignment, AAInfo); - SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, MMO); - DAG.setRoot(StoreNode); - setValue(&I, StoreNode); -} - -void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { - SDLoc sdl = getCurSDLoc(); - - Value *PtrOperand = I.getArgOperand(0); - SDValue Ptr = getValue(PtrOperand); - SDValue Src0 = getValue(I.getArgOperand(1)); - SDValue Mask = getValue(I.getArgOperand(3)); - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT VT = TLI.getValueType(I.getType()); - unsigned Alignment = (cast(I.getArgOperand(2)))->getZExtValue(); - if (!Alignment) - Alignment = DAG.getEVTAlignment(VT); - - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); - const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); - - SDValue InChain = DAG.getRoot(); - if (AA->pointsToConstantMemory( - AliasAnalysis::Location(PtrOperand, - AA->getTypeStoreSize(I.getType()), - AAInfo))) { - // Do not serialize (non-volatile) loads of constant memory with anything. - InChain = DAG.getEntryNode(); - } - - MachineMemOperand *MMO = - DAG.getMachineFunction(). - getMachineMemOperand(MachinePointerInfo(PtrOperand), - MachineMemOperand::MOLoad, VT.getStoreSize(), - Alignment, AAInfo, Ranges); - - SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, MMO); - SDValue OutChain = Load.getValue(1); - DAG.setRoot(OutChain); - setValue(&I, Load); -} - void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); @@ -4978,12 +4914,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } - case Intrinsic::masked_load: - visitMaskedLoad(I); - return nullptr; - case Intrinsic::masked_store: - visitMaskedStore(I); - return nullptr; case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: case Intrinsic::x86_mmx_pslli_q: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 2637e07a49b..f74e6525b0c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -756,8 +756,6 @@ private: void visitAlloca(const AllocaInst &I); void visitLoad(const LoadInst &I); void visitStore(const StoreInst &I); - void visitMaskedLoad(const CallInst &I); - void visitMaskedStore(const CallInst &I); void visitAtomicCmpXchg(const AtomicCmpXchgInst &I); void visitAtomicRMW(const AtomicRMWInst &I); void visitFence(const FenceInst &I); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index e8577d898c2..c9f6cff9155 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -269,8 +269,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { // Other operators case ISD::LOAD: return "load"; case ISD::STORE: return "store"; - case ISD::MLOAD: return "masked_load"; - case ISD::MSTORE: return "masked_store"; case ISD::VAARG: return "vaarg"; case ISD::VACOPY: return "vacopy"; case ISD::VAEND: return "vaend"; diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index fbf94b364af..b53f6f314ee 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -546,8 +546,7 @@ enum IIT_Info { IIT_ANYPTR = 26, IIT_V1 = 27, IIT_VARARG = 28, - IIT_HALF_VEC_ARG = 29, - IIT_SAME_VEC_WIDTH_ARG = 30 + IIT_HALF_VEC_ARG = 29 }; @@ -655,12 +654,6 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, ArgInfo)); return; } - case IIT_SAME_VEC_WIDTH_ARG: { - unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); - OutputTable.push_back(IITDescriptor::get(IITDescriptor::SameVecWidthArgument, - ArgInfo)); - return; - } case IIT_EMPTYSTRUCT: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct, 0)); return; @@ -768,14 +761,7 @@ static Type *DecodeFixedType(ArrayRef &Infos, case IITDescriptor::HalfVecArgument: return VectorType::getHalfElementsVectorType(cast( Tys[D.getArgumentNumber()])); - case IITDescriptor::SameVecWidthArgument: - Type *EltTy = DecodeFixedType(Infos, Tys, Context); - Type *Ty = Tys[D.getArgumentNumber()]; - if (VectorType *VTy = dyn_cast(Ty)) { - return VectorType::get(EltTy, VTy->getNumElements()); - } - llvm_unreachable("unhandled"); - } + } llvm_unreachable("unhandled"); } diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp index 5f63ded48fe..a4c5d9766a2 100644 --- a/lib/IR/IRBuilder.cpp +++ b/lib/IR/IRBuilder.cpp @@ -183,29 +183,3 @@ CallInst *IRBuilderBase::CreateAssumption(Value *Cond) { return createCallHelper(FnAssume, Ops, this); } -/// Create a call to a Masked Load intrinsic. -/// Ops - an array of operands. -CallInst *IRBuilderBase::CreateMaskedLoad(ArrayRef Ops) { - // The only one overloaded type - the type of passthru value in this case - Type *DataTy = Ops[1]->getType(); - return CreateMaskedIntrinsic(Intrinsic::masked_load, Ops, DataTy); -} - -/// Create a call to a Masked Store intrinsic. -/// Ops - an array of operands. -CallInst *IRBuilderBase::CreateMaskedStore(ArrayRef Ops) { - // DataTy - type of the data to be stored - the only one overloaded type - Type *DataTy = Ops[1]->getType(); - return CreateMaskedIntrinsic(Intrinsic::masked_store, Ops, DataTy); -} - -/// Create a call to a Masked intrinsic, with given intrinsic Id, -/// an array of operands - Ops, and one overloaded type - DataTy -CallInst *IRBuilderBase::CreateMaskedIntrinsic(unsigned Id, - ArrayRef Ops, - Type *DataTy) { - Module *M = BB->getParent()->getParent(); - Type *OverloadedTypes[] = { DataTy }; - Value *TheFn = Intrinsic::getDeclaration(M, (Intrinsic::ID)Id, OverloadedTypes); - return createCallHelper(TheFn, Ops, this); -} diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index b3ca756b87a..9698dbd77fd 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -2405,19 +2405,6 @@ bool Verifier::VerifyIntrinsicType(Type *Ty, !isa(ArgTys[D.getArgumentNumber()]) || VectorType::getHalfElementsVectorType( cast(ArgTys[D.getArgumentNumber()])) != Ty; - case IITDescriptor::SameVecWidthArgument: { - if (D.getArgumentNumber() >= ArgTys.size()) - return true; - VectorType * ReferenceType = - dyn_cast(ArgTys[D.getArgumentNumber()]); - VectorType *ThisArgType = dyn_cast(Ty); - if (!ThisArgType || !ReferenceType || - (ReferenceType->getVectorNumElements() != - ThisArgType->getVectorNumElements())) - return true; - return VerifyIntrinsicType(ThisArgType->getVectorElementType(), - Infos, ArgTys); - } } llvm_unreachable("unhandled"); } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index edd3f37c133..b67bbcb39c3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1321,21 +1321,13 @@ void X86TargetLowering::resetOperationActions() { // Extract subvector is special because the value type // (result) is 128-bit but the source is 256-bit wide. - if (VT.is128BitVector()) { - if (VT.getScalarSizeInBits() >= 32) { - setOperationAction(ISD::MLOAD, VT, Custom); - setOperationAction(ISD::MSTORE, VT, Custom); - } + if (VT.is128BitVector()) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); - } + // Do not attempt to custom lower other non-256-bit vectors if (!VT.is256BitVector()) continue; - if (VT.getScalarSizeInBits() >= 32) { - setOperationAction(ISD::MLOAD, VT, Legal); - setOperationAction(ISD::MSTORE, VT, Legal); - } setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); @@ -1502,13 +1494,9 @@ void X86TargetLowering::resetOperationActions() { unsigned EltSize = VT.getVectorElementType().getSizeInBits(); // Extract subvector is special because the value type // (result) is 256/128-bit but the source is 512-bit wide. - if (VT.is128BitVector() || VT.is256BitVector()) { + if (VT.is128BitVector() || VT.is256BitVector()) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); - if ( EltSize >= 32) { - setOperationAction(ISD::MLOAD, VT, Legal); - setOperationAction(ISD::MSTORE, VT, Legal); - } - } + if (VT.getVectorElementType() == MVT::i1) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); @@ -1524,8 +1512,6 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); - setOperationAction(ISD::MLOAD, VT, Legal); - setOperationAction(ISD::MSTORE, VT, Legal); } } for (int i = MVT::v32i8; i != MVT::v8i64; ++i) { diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 5c33c39c418..01e9ce7078f 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2122,41 +2122,6 @@ def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src), (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src)>; -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)), - (VMOVUPSZmrk addr:$ptr, - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), - (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>; - -def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; - -def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src)), - (VMOVUPSZmrk addr:$ptr, VK16WM:$mask, VR512:$src)>; - -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src)), - (VMOVUPDZmrk addr:$ptr, VK8WM:$mask, VR512:$src)>; - -def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, undef)), - (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>; - -def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, - (bc_v16f32 (v16i32 immAllZerosV)))), - (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>; - -def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src0))), - (VMOVUPSZrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>; - -def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>; - -def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, - (bc_v8f64 (v16i32 immAllZerosV)))), - (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>; - -def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src0))), - (VMOVUPDZrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>; - defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32", "16", "8", "4", SSEPackedInt, HasAVX512>, avx512_store_vl<0x7F, "vmovdqa32", "alignedstore", @@ -2231,46 +2196,6 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; } -def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 immAllZerosV))), - (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>; - -def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, undef)), - (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>; - -def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src0))), - (VMOVDQU32Zrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>; - -def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, - (bc_v8i64 (v16i32 immAllZerosV)))), - (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>; - -def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>; - -def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src0))), - (VMOVDQU64Zrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>; - -def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src)), - (VMOVDQU32Zmrk addr:$ptr, VK16WM:$mask, VR512:$src)>; - -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src)), - (VMOVDQU64Zmrk addr:$ptr, VK8WM:$mask, VR512:$src)>; - -// SKX replacement -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)), - (VMOVDQU32Z256mrk addr:$ptr, VK8WM:$mask, VR256:$src)>; - -// KNL replacement -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)), - (VMOVDQU32Zmrk addr:$ptr, - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), - (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>; - -def: Pat<(v8i32 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; - - // Move Int Doubleword to Packed Double Int // def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 38a7838f685..1b07e874837 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -9260,61 +9260,6 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", int_x86_avx2_maskstore_q, int_x86_avx2_maskstore_q_256>, VEX_W; -def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)), - (VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>; - -def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)), - (VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>; - -def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)), - (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>; - -def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), - (bc_v8f32 (v8i32 immAllZerosV)))), - (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>; - -def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src0))), - (VBLENDVPSYrr VR256:$src0, (VPMASKMOVDYrm VR256:$mask, addr:$ptr), - VR256:$mask)>; - -def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)), - (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>; - -def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 immAllZerosV))), - (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>; - -def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src0))), - (VBLENDVPSYrr VR256:$src0, (VPMASKMOVDYrm VR256:$mask, addr:$ptr), - VR256:$mask)>; - -def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)), - (VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>; - -def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)), - (VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>; - -def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)), - (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>; - -def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), - (v4f64 immAllZerosV))), - (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>; - -def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src0))), - (VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr), - VR256:$mask)>; - -def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)), - (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>; - -def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), - (bc_v4i64 (v8i32 immAllZerosV)))), - (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>; - -def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src0))), - (VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr), - VR256:$mask)>; - //===----------------------------------------------------------------------===// // Variable Bit Shifts diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 432cedaa803..2b70fd0ecf8 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -111,8 +111,6 @@ public: Type *Ty) const override; unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) const override; - bool isLegalPredicatedLoad (Type *DataType, int Consecutive) const override; - bool isLegalPredicatedStore(Type *DataType, int Consecutive) const override; /// @} }; @@ -1158,19 +1156,3 @@ unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, } return X86TTI::getIntImmCost(Imm, Ty); } - -bool X86TTI::isLegalPredicatedLoad(Type *DataType, int Consecutive) const { - int ScalarWidth = DataType->getScalarSizeInBits(); - - // Todo: AVX512 allows gather/scatter, works with strided and random as well - if ((ScalarWidth < 32) || (Consecutive == 0)) - return false; - if (ST->hasAVX512() || ST->hasAVX2()) - return true; - return false; -} - -bool X86TTI::isLegalPredicatedStore(Type *DataType, int Consecutive) const { - return isLegalPredicatedLoad(DataType, Consecutive); -} - diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index de4cb262575..35b2ecf99ce 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -580,10 +580,9 @@ public: LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL, DominatorTree *DT, TargetLibraryInfo *TLI, - AliasAnalysis *AA, Function *F, - const TargetTransformInfo *TTI) + AliasAnalysis *AA, Function *F) : NumLoads(0), NumStores(0), NumPredStores(0), TheLoop(L), SE(SE), DL(DL), - DT(DT), TLI(TLI), AA(AA), TheFunction(F), TTI(TTI), Induction(nullptr), + DT(DT), TLI(TLI), AA(AA), TheFunction(F), Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) { } @@ -769,15 +768,6 @@ public: } SmallPtrSet::iterator strides_end() { return StrideSet.end(); } - bool canPredicateStore(Type *DataType, Value *Ptr) { - return TTI->isLegalPredicatedStore(DataType, isConsecutivePtr(Ptr)); - } - bool canPredicateLoad(Type *DataType, Value *Ptr) { - return TTI->isLegalPredicatedLoad(DataType, isConsecutivePtr(Ptr)); - } - bool setMaskedOp(const Instruction* I) { - return (MaskedOp.find(I) != MaskedOp.end()); - } private: /// Check if a single basic block loop is vectorizable. /// At this point we know that this is a loop with a constant trip count @@ -850,8 +840,6 @@ private: AliasAnalysis *AA; /// Parent function Function *TheFunction; - /// Target Transform Info - const TargetTransformInfo *TTI; // --- vectorization state --- // @@ -883,10 +871,6 @@ private: ValueToValueMap Strides; SmallPtrSet StrideSet; - - /// While vectorizing these instructions we have to generate a - /// call to an appropriate masked intrinsic - std::set MaskedOp; }; /// LoopVectorizationCostModel - estimates the expected speedups due to @@ -1391,7 +1375,7 @@ struct LoopVectorize : public FunctionPass { } // Check if it is legal to vectorize the loop. - LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI); + LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); emitMissedWarning(F, L, Hints); @@ -1779,8 +1763,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy); unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF; - if (SI && Legal->blockNeedsPredication(SI->getParent()) && - !Legal->setMaskedOp(SI)) + if (SI && Legal->blockNeedsPredication(SI->getParent())) return scalarizeInstruction(Instr, true); if (ScalarAllocatedSize != VectorElementSize) @@ -1874,25 +1857,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace)); - - Instruction *NewSI; - if (Legal->setMaskedOp(SI)) { - Type *I8PtrTy = - Builder.getInt8PtrTy(PartPtr->getType()->getPointerAddressSpace()); - - Value *I8Ptr = Builder.CreateBitCast(PartPtr, I8PtrTy); - - VectorParts Cond = createEdgeMask(SI->getParent()->getSinglePredecessor(), - SI->getParent()); - SmallVector Ops; - Ops.push_back(I8Ptr); - Ops.push_back(StoredVal[Part]); - Ops.push_back(Builder.getInt32(Alignment)); - Ops.push_back(Cond[Part]); - NewSI = Builder.CreateMaskedStore(Ops); - } - else - NewSI = Builder.CreateAlignedStore(StoredVal[Part], VecPtr, Alignment); + StoreInst *NewSI = + Builder.CreateAlignedStore(StoredVal[Part], VecPtr, Alignment); propagateMetadata(NewSI, SI); } return; @@ -1907,31 +1873,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { if (Reverse) { // If the address is consecutive but reversed, then the - // wide load needs to start at the last vector element. + // wide store needs to start at the last vector element. PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); } - Instruction* NewLI; - if (Legal->setMaskedOp(LI)) { - Type *I8PtrTy = - Builder.getInt8PtrTy(PartPtr->getType()->getPointerAddressSpace()); - - Value *I8Ptr = Builder.CreateBitCast(PartPtr, I8PtrTy); - - VectorParts SrcMask = createBlockInMask(LI->getParent()); - SmallVector Ops; - Ops.push_back(I8Ptr); - Ops.push_back(UndefValue::get(DataTy)); - Ops.push_back(Builder.getInt32(Alignment)); - Ops.push_back(SrcMask[Part]); - NewLI = Builder.CreateMaskedLoad(Ops); - } - else { - Value *VecPtr = Builder.CreateBitCast(PartPtr, - DataTy->getPointerTo(AddressSpace)); - NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load"); - } + Value *VecPtr = Builder.CreateBitCast(PartPtr, + DataTy->getPointerTo(AddressSpace)); + LoadInst *NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load"); propagateMetadata(NewLI, LI); Entry[Part] = Reverse ? reverseVector(NewLI) : NewLI; } @@ -5355,15 +5304,8 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB, // We might be able to hoist the load. if (it->mayReadFromMemory()) { LoadInst *LI = dyn_cast(it); - if (!LI) - return false; - if (!SafePtrs.count(LI->getPointerOperand())) { - if (canPredicateLoad(LI->getType(), LI->getPointerOperand())) { - MaskedOp.insert(LI); - continue; - } + if (!LI || !SafePtrs.count(LI->getPointerOperand())) return false; - } } // We don't predicate stores at the moment. @@ -5371,20 +5313,10 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB, StoreInst *SI = dyn_cast(it); // We only support predication of stores in basic blocks with one // predecessor. - if (!SI) - return false; - - if (++NumPredStores > NumberOfStoresToPredicate || + if (!SI || ++NumPredStores > NumberOfStoresToPredicate || !SafePtrs.count(SI->getPointerOperand()) || - !SI->getParent()->getSinglePredecessor()) { - if (canPredicateStore(SI->getValueOperand()->getType(), - SI->getPointerOperand())) { - MaskedOp.insert(SI); - --NumPredStores; - continue; - } + !SI->getParent()->getSinglePredecessor()) return false; - } } if (it->mayThrow()) return false; @@ -5448,7 +5380,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { MaxVectorSize = 1; } - assert(MaxVectorSize <= 64 && "Did not expect to pack so many elements" + assert(MaxVectorSize <= 32 && "Did not expect to pack so many elements" " into one vector!"); unsigned VF = MaxVectorSize; @@ -5509,7 +5441,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { // the vector elements. float VectorCost = expectedCost(i) / (float)i; DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " << - VectorCost << ".\n"); + (int)VectorCost << ".\n"); if (VectorCost < Cost) { Cost = VectorCost; Width = i; diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll deleted file mode 100644 index 8cb2d63d5f6..00000000000 --- a/test/CodeGen/X86/masked_memop.ll +++ /dev/null @@ -1,73 +0,0 @@ -; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=knl < %s | FileCheck %s -check-prefix=AVX512 -; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2 - -; AVX512-LABEL: test1 -; AVX512: vmovdqu32 (%rdi), %zmm0 {%k1} {z} - -; AVX2-LABEL: test1 -; AVX2: vpmaskmovd 32(%rdi) -; AVX2: vpmaskmovd (%rdi) -; AVX2-NOT: blend - -define <16 x i32> @test1(<16 x i32> %trigger, i8* %addr) { - %mask = icmp eq <16 x i32> %trigger, zeroinitializer - %res = call <16 x i32> @llvm.masked.load.v16i32(i8* %addr, <16 x i32>undef, i32 4, <16 x i1>%mask) - ret <16 x i32> %res -} - -; AVX512-LABEL: test2 -; AVX512: vmovdqu32 (%rdi), %zmm0 {%k1} {z} - -; AVX2-LABEL: test2 -; AVX2: vpmaskmovd {{.*}}(%rdi) -; AVX2: vpmaskmovd {{.*}}(%rdi) -; AVX2-NOT: blend -define <16 x i32> @test2(<16 x i32> %trigger, i8* %addr) { - %mask = icmp eq <16 x i32> %trigger, zeroinitializer - %res = call <16 x i32> @llvm.masked.load.v16i32(i8* %addr, <16 x i32>zeroinitializer, i32 4, <16 x i1>%mask) - ret <16 x i32> %res -} - -; AVX512-LABEL: test3 -; AVX512: vmovdqu32 %zmm1, (%rdi) {%k1} - -define void @test3(<16 x i32> %trigger, i8* %addr, <16 x i32> %val) { - %mask = icmp eq <16 x i32> %trigger, zeroinitializer - call void @llvm.masked.store.v16i32(i8* %addr, <16 x i32>%val, i32 4, <16 x i1>%mask) - ret void -} - -; AVX512-LABEL: test4 -; AVX512: vmovups (%rdi), %zmm{{.*{%k[1-7]}}} - -; AVX2-LABEL: test4 -; AVX2: vpmaskmovd {{.*}}(%rdi) -; AVX2: vpmaskmovd {{.*}}(%rdi) -; AVX2: blend -define <16 x float> @test4(<16 x i32> %trigger, i8* %addr, <16 x float> %dst) { - %mask = icmp eq <16 x i32> %trigger, zeroinitializer - %res = call <16 x float> @llvm.masked.load.v16f32(i8* %addr, <16 x float>%dst, i32 4, <16 x i1>%mask) - ret <16 x float> %res -} - -; AVX512-LABEL: test5 -; AVX512: vmovupd (%rdi), %zmm1 {%k1} - -; AVX2-LABEL: test5 -; AVX2: vpmaskmovq -; AVX2: vblendvpd -; AVX2: vpmaskmovq -; AVX2: vblendvpd -define <8 x double> @test5(<8 x i32> %trigger, i8* %addr, <8 x double> %dst) { - %mask = icmp eq <8 x i32> %trigger, zeroinitializer - %res = call <8 x double> @llvm.masked.load.v8f64(i8* %addr, <8 x double>%dst, i32 4, <8 x i1>%mask) - ret <8 x double> %res -} - -declare <16 x i32> @llvm.masked.load.v16i32(i8*, <16 x i32>, i32, <16 x i1>) -declare void @llvm.masked.store.v16i32(i8*, <16 x i32>, i32, <16 x i1>) -declare <16 x float> @llvm.masked.load.v16f32(i8*, <16 x float>, i32, <16 x i1>) -declare void @llvm.masked.store.v16f32(i8*, <16 x float>, i32, <16 x i1>) -declare <8 x double> @llvm.masked.load.v8f64(i8*, <8 x double>, i32, <8 x i1>) -declare void @llvm.masked.store.v8f64(i8*, <8 x double>, i32, <8 x i1>) - diff --git a/test/Transforms/LoopVectorize/X86/mask1.ll b/test/Transforms/LoopVectorize/X86/mask1.ll deleted file mode 100644 index 1721955a869..00000000000 --- a/test/Transforms/LoopVectorize/X86/mask1.ll +++ /dev/null @@ -1,83 +0,0 @@ -; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1 -; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2 -; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512 - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc_linux" - -; The source code: -; -;void foo(int *A, int *B, int *trigger) { -; -; for (int i=0; i<10000; i++) { -; if (trigger[i] < 100) { -; A[i] = B[i] + trigger[i]; -; } -; } -;} - - -;AVX2: llvm.masked.load.v8i32 -;AVX2: llvm.masked.store.v8i32 -;AVX512: llvm.masked.load.v16i32 -;AVX512: llvm.masked.store.v16i32 -;AVX1-NOT: llvm.masked - -; Function Attrs: nounwind uwtable -define void @foo(i32* %A, i32* %B, i32* %trigger) { -entry: - %A.addr = alloca i32*, align 8 - %B.addr = alloca i32*, align 8 - %trigger.addr = alloca i32*, align 8 - %i = alloca i32, align 4 - store i32* %A, i32** %A.addr, align 8 - store i32* %B, i32** %B.addr, align 8 - store i32* %trigger, i32** %trigger.addr, align 8 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32* %i, align 4 - %cmp = icmp slt i32 %0, 10000 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32* %i, align 4 - %idxprom = sext i32 %1 to i64 - %2 = load i32** %trigger.addr, align 8 - %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom - %3 = load i32* %arrayidx, align 4 - %cmp1 = icmp slt i32 %3, 100 - br i1 %cmp1, label %if.then, label %if.end - -if.then: ; preds = %for.body - %4 = load i32* %i, align 4 - %idxprom2 = sext i32 %4 to i64 - %5 = load i32** %B.addr, align 8 - %arrayidx3 = getelementptr inbounds i32* %5, i64 %idxprom2 - %6 = load i32* %arrayidx3, align 4 - %7 = load i32* %i, align 4 - %idxprom4 = sext i32 %7 to i64 - %8 = load i32** %trigger.addr, align 8 - %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4 - %9 = load i32* %arrayidx5, align 4 - %add = add nsw i32 %6, %9 - %10 = load i32* %i, align 4 - %idxprom6 = sext i32 %10 to i64 - %11 = load i32** %A.addr, align 8 - %arrayidx7 = getelementptr inbounds i32* %11, i64 %idxprom6 - store i32 %add, i32* %arrayidx7, align 4 - br label %if.end - -if.end: ; preds = %if.then, %for.body - br label %for.inc - -for.inc: ; preds = %if.end - %12 = load i32* %i, align 4 - %inc = add nsw i32 %12, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} diff --git a/test/Transforms/LoopVectorize/X86/mask2.ll b/test/Transforms/LoopVectorize/X86/mask2.ll deleted file mode 100644 index 929c88dbe7b..00000000000 --- a/test/Transforms/LoopVectorize/X86/mask2.ll +++ /dev/null @@ -1,84 +0,0 @@ -; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1 -; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2 -; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512 - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc_linux" - -; The source code: -; -;void foo(float *A, float *B, int *trigger) { -; -; for (int i=0; i<10000; i++) { -; if (trigger[i] < 100) { -; A[i] = B[i] + trigger[i]; -; } -; } -;} - - -;AVX2: llvm.masked.load.v8f32 -;AVX2: llvm.masked.store.v8f32 -;AVX512: llvm.masked.load.v16f32 -;AVX512: llvm.masked.store.v16f32 -;AVX1-NOT: llvm.masked - -; Function Attrs: nounwind uwtable -define void @foo(float* %A, float* %B, i32* %trigger) { -entry: - %A.addr = alloca float*, align 8 - %B.addr = alloca float*, align 8 - %trigger.addr = alloca i32*, align 8 - %i = alloca i32, align 4 - store float* %A, float** %A.addr, align 8 - store float* %B, float** %B.addr, align 8 - store i32* %trigger, i32** %trigger.addr, align 8 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32* %i, align 4 - %cmp = icmp slt i32 %0, 10000 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32* %i, align 4 - %idxprom = sext i32 %1 to i64 - %2 = load i32** %trigger.addr, align 8 - %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom - %3 = load i32* %arrayidx, align 4 - %cmp1 = icmp slt i32 %3, 100 - br i1 %cmp1, label %if.then, label %if.end - -if.then: ; preds = %for.body - %4 = load i32* %i, align 4 - %idxprom2 = sext i32 %4 to i64 - %5 = load float** %B.addr, align 8 - %arrayidx3 = getelementptr inbounds float* %5, i64 %idxprom2 - %6 = load float* %arrayidx3, align 4 - %7 = load i32* %i, align 4 - %idxprom4 = sext i32 %7 to i64 - %8 = load i32** %trigger.addr, align 8 - %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4 - %9 = load i32* %arrayidx5, align 4 - %conv = sitofp i32 %9 to float - %add = fadd float %6, %conv - %10 = load i32* %i, align 4 - %idxprom6 = sext i32 %10 to i64 - %11 = load float** %A.addr, align 8 - %arrayidx7 = getelementptr inbounds float* %11, i64 %idxprom6 - store float %add, float* %arrayidx7, align 4 - br label %if.end - -if.end: ; preds = %if.then, %for.body - br label %for.inc - -for.inc: ; preds = %if.end - %12 = load i32* %i, align 4 - %inc = add nsw i32 %12, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} diff --git a/test/Transforms/LoopVectorize/X86/mask3.ll b/test/Transforms/LoopVectorize/X86/mask3.ll deleted file mode 100644 index 9fb554ef127..00000000000 --- a/test/Transforms/LoopVectorize/X86/mask3.ll +++ /dev/null @@ -1,84 +0,0 @@ -; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1 -; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2 -; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512 - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc_linux" - -; The source code: -; -;void foo(double *A, double *B, int *trigger) { -; -; for (int i=0; i<10000; i++) { -; if (trigger[i] < 100) { -; A[i] = B[i] + trigger[i]; -; } -; } -;} - - -;AVX2: llvm.masked.load.v4f64 -;AVX2: llvm.masked.store.v4f64 -;AVX512: llvm.masked.load.v8f64 -;AVX512: llvm.masked.store.v8f64 -;AVX1-NOT: llvm.masked - -; Function Attrs: nounwind uwtable -define void @foo(double* %A, double* %B, i32* %trigger) #0 { -entry: - %A.addr = alloca double*, align 8 - %B.addr = alloca double*, align 8 - %trigger.addr = alloca i32*, align 8 - %i = alloca i32, align 4 - store double* %A, double** %A.addr, align 8 - store double* %B, double** %B.addr, align 8 - store i32* %trigger, i32** %trigger.addr, align 8 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32* %i, align 4 - %cmp = icmp slt i32 %0, 10000 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32* %i, align 4 - %idxprom = sext i32 %1 to i64 - %2 = load i32** %trigger.addr, align 8 - %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom - %3 = load i32* %arrayidx, align 4 - %cmp1 = icmp slt i32 %3, 100 - br i1 %cmp1, label %if.then, label %if.end - -if.then: ; preds = %for.body - %4 = load i32* %i, align 4 - %idxprom2 = sext i32 %4 to i64 - %5 = load double** %B.addr, align 8 - %arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2 - %6 = load double* %arrayidx3, align 8 - %7 = load i32* %i, align 4 - %idxprom4 = sext i32 %7 to i64 - %8 = load i32** %trigger.addr, align 8 - %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4 - %9 = load i32* %arrayidx5, align 4 - %conv = sitofp i32 %9 to double - %add = fadd double %6, %conv - %10 = load i32* %i, align 4 - %idxprom6 = sext i32 %10 to i64 - %11 = load double** %A.addr, align 8 - %arrayidx7 = getelementptr inbounds double* %11, i64 %idxprom6 - store double %add, double* %arrayidx7, align 8 - br label %if.end - -if.end: ; preds = %if.then, %for.body - br label %for.inc - -for.inc: ; preds = %if.end - %12 = load i32* %i, align 4 - %inc = add nsw i32 %12, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} diff --git a/test/Transforms/LoopVectorize/X86/mask4.ll b/test/Transforms/LoopVectorize/X86/mask4.ll deleted file mode 100644 index 5cc71f8f6aa..00000000000 --- a/test/Transforms/LoopVectorize/X86/mask4.ll +++ /dev/null @@ -1,83 +0,0 @@ -; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1 -; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2 -; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512 - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc_linux" - -; The source code: -; -;void foo(double *A, double *B, int *trigger) { -; -; for (int i=0; i<10000; i++) { -; if (trigger[i] < 100) { -; A[i] = B[i*2] + trigger[i]; << non-cosecutive access -; } -; } -;} - - -;AVX2-NOT: llvm.masked -;AVX512-NOT: llvm.masked -;AVX1-NOT: llvm.masked - -; Function Attrs: nounwind uwtable -define void @foo(double* %A, double* %B, i32* %trigger) { -entry: - %A.addr = alloca double*, align 8 - %B.addr = alloca double*, align 8 - %trigger.addr = alloca i32*, align 8 - %i = alloca i32, align 4 - store double* %A, double** %A.addr, align 8 - store double* %B, double** %B.addr, align 8 - store i32* %trigger, i32** %trigger.addr, align 8 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32* %i, align 4 - %cmp = icmp slt i32 %0, 10000 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32* %i, align 4 - %idxprom = sext i32 %1 to i64 - %2 = load i32** %trigger.addr, align 8 - %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom - %3 = load i32* %arrayidx, align 4 - %cmp1 = icmp slt i32 %3, 100 - br i1 %cmp1, label %if.then, label %if.end - -if.then: ; preds = %for.body - %4 = load i32* %i, align 4 - %mul = mul nsw i32 %4, 2 - %idxprom2 = sext i32 %mul to i64 - %5 = load double** %B.addr, align 8 - %arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2 - %6 = load double* %arrayidx3, align 8 - %7 = load i32* %i, align 4 - %idxprom4 = sext i32 %7 to i64 - %8 = load i32** %trigger.addr, align 8 - %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4 - %9 = load i32* %arrayidx5, align 4 - %conv = sitofp i32 %9 to double - %add = fadd double %6, %conv - %10 = load i32* %i, align 4 - %idxprom6 = sext i32 %10 to i64 - %11 = load double** %A.addr, align 8 - %arrayidx7 = getelementptr inbounds double* %11, i64 %idxprom6 - store double %add, double* %arrayidx7, align 8 - br label %if.end - -if.end: ; preds = %if.then, %for.body - br label %for.inc - -for.inc: ; preds = %if.end - %12 = load i32* %i, align 4 - %inc = add nsw i32 %12, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp index b871e067794..87777d46018 100644 --- a/utils/TableGen/CodeGenTarget.cpp +++ b/utils/TableGen/CodeGenTarget.cpp @@ -537,8 +537,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) { // variants with iAny types; otherwise, if the intrinsic is not // overloaded, all the types can be specified directly. assert(((!TyEl->isSubClassOf("LLVMExtendedType") && - !TyEl->isSubClassOf("LLVMTruncatedType") && - !TyEl->isSubClassOf("LLVMVectorSameWidth")) || + !TyEl->isSubClassOf("LLVMTruncatedType")) || VT == MVT::iAny || VT == MVT::vAny) && "Expected iAny or vAny type"); } else diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp index dcf4b80e4ef..37f6de057da 100644 --- a/utils/TableGen/IntrinsicEmitter.cpp +++ b/utils/TableGen/IntrinsicEmitter.cpp @@ -257,8 +257,7 @@ enum IIT_Info { IIT_ANYPTR = 26, IIT_V1 = 27, IIT_VARARG = 28, - IIT_HALF_VEC_ARG = 29, - IIT_SAME_VEC_WIDTH_ARG = 30 + IIT_HALF_VEC_ARG = 29 }; @@ -306,13 +305,6 @@ static void EncodeFixedType(Record *R, std::vector &ArgCodes, Sig.push_back(IIT_TRUNC_ARG); else if (R->isSubClassOf("LLVMHalfElementsVectorType")) Sig.push_back(IIT_HALF_VEC_ARG); - else if (R->isSubClassOf("LLVMVectorSameWidth")) { - Sig.push_back(IIT_SAME_VEC_WIDTH_ARG); - Sig.push_back((Number << 2) | ArgCodes[Number]); - MVT::SimpleValueType VT = getValueType(R->getValueAsDef("ElTy")); - EncodeFixedValueType(VT, Sig); - return; - } else Sig.push_back(IIT_ARG); return Sig.push_back((Number << 2) | ArgCodes[Number]); -- 2.34.1