/// chain (aliasing node.)
SDValue FindBetterChain(SDNode *N, SDValue Chain);
+ /// Holds a pointer to an LSBaseSDNode as well as information on where it
+ /// is located in a sequence of memory operations connected by a chain.
+ struct MemOpLink {
+ MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
+ MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
+ // Ptr to the mem node.
+ LSBaseSDNode *MemNode;
+ // Offset from the base ptr.
+ int64_t OffsetFromBase;
+ // What is the sequence number of this mem node.
+ // Lowest mem operand in the DAG starts at zero.
+ unsigned SequenceNum;
+ };
+
+ /// This is a helper function for MergeConsecutiveStores. When the source
+ /// elements of the consecutive stores are all constants or all extracted
+ /// vector elements, try to merge them into one larger store.
+ /// \return True if a merged store was created.
+ bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
+ EVT MemVT, unsigned NumElem,
+ bool IsConstantSrc, bool UseVector);
+
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return True if some memory operations were changed.
}
};
-/// Holds a pointer to an LSBaseSDNode as well as information on where it
-/// is located in a sequence of memory operations connected by a chain.
-struct MemOpLink {
- MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
- MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
- // Ptr to the mem node.
- LSBaseSDNode *MemNode;
- // Offset from the base ptr.
- int64_t OffsetFromBase;
- // What is the sequence number of this mem node.
- // Lowest mem operand in the DAG starts at zero.
- unsigned SequenceNum;
-};
+bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
+ SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
+ unsigned NumElem, bool IsConstantSrc, bool UseVector) {
+ // Make sure we have something to merge.
+ if (NumElem < 2)
+ return false;
+
+ int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned EarliestNodeUsed = 0;
+
+ for (unsigned i=0; i < NumElem; ++i) {
+ // Find a chain for the new wide-store operand. Notice that some
+ // of the store nodes that we found may not be selected for inclusion
+ // in the wide store. The chain we use needs to be the chain of the
+ // earliest store node which is *used* and replaced by the wide store.
+ if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
+ EarliestNodeUsed = i;
+ }
+
+ // The earliest Node in the DAG.
+ LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+ SDLoc DL(StoreNodes[0].MemNode);
+
+ SDValue StoredVal;
+ if (UseVector) {
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+ assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+ if (IsConstantSrc) {
+ // A vector store with a constant source implies that the constant is
+ // zero; we only handle merging stores of constant zeros because the zero
+ // can be materialized without a load.
+ // It may be beneficial to loosen this restriction to allow non-zero
+ // store merging.
+ StoredVal = DAG.getConstant(0, Ty);
+ } else {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue Val = St->getValue();
+ // All of the operands of a BUILD_VECTOR must have the same type.
+ if (Val.getValueType() != MemVT)
+ return false;
+ Ops.push_back(Val);
+ }
+
+ // Build the extracted vector elements back into a vector.
+ StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
+ }
+ } else {
+ // We should always use a vector store when merging extracted vector
+ // elements, so this path implies a store of constants.
+ assert(IsConstantSrc && "Merged vector elements should use vector store");
+
+ unsigned StoreBW = NumElem * ElementSizeBytes * 8;
+ APInt StoreInt(StoreBW, 0);
+
+ // Construct a single integer constant which is made of the smaller
+ // constant inputs.
+ bool IsLE = TLI.isLittleEndian();
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+ SDValue Val = St->getValue();
+ StoreInt <<= ElementSizeBytes*8;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
+ StoreInt |= C->getAPIntValue().zext(StoreBW);
+ } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
+ StoreInt |= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
+ } else {
+ llvm_unreachable("Invalid constant element type");
+ }
+ }
+
+ // Create the new Load and Store operations.
+ EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+ StoredVal = DAG.getConstant(StoreInt, StoreTy);
+ }
+
+ SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ false, false,
+ FirstInChain->getAlignment());
+
+ // Replace the first store with the new store
+ CombineTo(EarliestOp, NewStore);
+ // Erase all other stores.
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ if (StoreNodes[i].MemNode == EarliestOp)
+ continue;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ // ReplaceAllUsesWith will replace all uses that existed when it was
+ // called, but graph optimizations may cause new ones to appear. For
+ // example, the case in pr14333 looks like
+ //
+ // St's chain -> St -> another store -> X
+ //
+ // And the only difference from St to the other store is the chain.
+ // When we change it's chain to be St's chain they become identical,
+ // get CSEed and the net result is that X is now a use of St.
+ // Since we know that St is redundant, just iterate.
+ while (!St->use_empty())
+ DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
+ deleteAndRecombine(St);
+ }
+
+ return true;
+}
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
EVT MemVT = St->getMemoryVT();
return false;
// Perform an early exit check. Do not bother looking at stored values that
- // are not constants or loads.
+ // are not constants, loads, or extracted vector elements.
SDValue StoredVal = St->getValue();
bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
- if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
- !IsLoadSrc)
+ bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
+ isa<ConstantFPSDNode>(StoredVal);
+ bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+
+ if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
return false;
// Only look at ends of store sequences.
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
// Store the constants into memory as one consecutive store.
- if (!IsLoadSrc) {
+ if (IsConstantSrc) {
unsigned LastLegalType = 0;
unsigned LastLegalVectorType = 0;
bool NonZero = false;
bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
- // Make sure we have something to merge.
- if (NumElem < 2)
- return false;
-
- unsigned EarliestNodeUsed = 0;
- for (unsigned i=0; i < NumElem; ++i) {
- // Find a chain for the new wide-store operand. Notice that some
- // of the store nodes that we found may not be selected for inclusion
- // in the wide store. The chain we use needs to be the chain of the
- // earliest store node which is *used* and replaced by the wide store.
- if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
- EarliestNodeUsed = i;
- }
-
- // The earliest Node in the DAG.
- LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
- SDLoc DL(StoreNodes[0].MemNode);
+ return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+ true, UseVector);
+ }
- SDValue StoredVal;
- if (UseVector) {
+ // When extracting multiple vector elements, try to store them
+ // in one vector store rather than a sequence of scalar stores.
+ if (IsExtractVecEltSrc) {
+ unsigned NumElem = 0;
+ for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue StoredVal = St->getValue();
+ // This restriction could be loosened.
+ // Bail out if any stored values are not elements extracted from a vector.
+ // It should be possible to handle mixed sources, but load sources need
+ // more careful handling (see the block of code below that handles
+ // consecutive loads).
+ if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
+
// Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
- assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
- StoredVal = DAG.getConstant(0, Ty);
- } else {
- unsigned StoreBW = NumElem * ElementSizeBytes * 8;
- APInt StoreInt(StoreBW, 0);
-
- // Construct a single integer constant which is made of the smaller
- // constant inputs.
- bool IsLE = TLI.isLittleEndian();
- for (unsigned i = 0; i < NumElem ; ++i) {
- unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
- SDValue Val = St->getValue();
- StoreInt<<=ElementSizeBytes*8;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
- StoreInt|=C->getAPIntValue().zext(StoreBW);
- } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
- StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
- } else {
- llvm_unreachable("Invalid constant element type");
- }
- }
-
- // Create the new Load and Store operations.
- EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
- StoredVal = DAG.getConstant(StoreInt, StoreTy);
- }
-
- SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
- FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(),
- false, false,
- FirstInChain->getAlignment());
-
- // Replace the first store with the new store
- CombineTo(EarliestOp, NewStore);
- // Erase all other stores.
- for (unsigned i = 0; i < NumElem ; ++i) {
- if (StoreNodes[i].MemNode == EarliestOp)
- continue;
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- // ReplaceAllUsesWith will replace all uses that existed when it was
- // called, but graph optimizations may cause new ones to appear. For
- // example, the case in pr14333 looks like
- //
- // St's chain -> St -> another store -> X
- //
- // And the only difference from St to the other store is the chain.
- // When we change it's chain to be St's chain they become identical,
- // get CSEed and the net result is that X is now a use of St.
- // Since we know that St is redundant, just iterate.
- while (!St->use_empty())
- DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
- deleteAndRecombine(St);
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+ if (TLI.isTypeLegal(Ty))
+ NumElem = i + 1;
}
- return true;
+ return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+ false, true);
}
// Below we handle the case of multiple consecutive stores that
; <label>:14
ret void
}
+
+; PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 )
+define void @merge_vec_element_store(<8 x float> %v, float* %ptr) {
+ %vecext0 = extractelement <8 x float> %v, i32 0
+ %vecext1 = extractelement <8 x float> %v, i32 1
+ %vecext2 = extractelement <8 x float> %v, i32 2
+ %vecext3 = extractelement <8 x float> %v, i32 3
+ %vecext4 = extractelement <8 x float> %v, i32 4
+ %vecext5 = extractelement <8 x float> %v, i32 5
+ %vecext6 = extractelement <8 x float> %v, i32 6
+ %vecext7 = extractelement <8 x float> %v, i32 7
+ %arrayidx1 = getelementptr inbounds float* %ptr, i64 1
+ %arrayidx2 = getelementptr inbounds float* %ptr, i64 2
+ %arrayidx3 = getelementptr inbounds float* %ptr, i64 3
+ %arrayidx4 = getelementptr inbounds float* %ptr, i64 4
+ %arrayidx5 = getelementptr inbounds float* %ptr, i64 5
+ %arrayidx6 = getelementptr inbounds float* %ptr, i64 6
+ %arrayidx7 = getelementptr inbounds float* %ptr, i64 7
+ store float %vecext0, float* %ptr, align 4
+ store float %vecext1, float* %arrayidx1, align 4
+ store float %vecext2, float* %arrayidx2, align 4
+ store float %vecext3, float* %arrayidx3, align 4
+ store float %vecext4, float* %arrayidx4, align 4
+ store float %vecext5, float* %arrayidx5, align 4
+ store float %vecext6, float* %arrayidx6, align 4
+ store float %vecext7, float* %arrayidx7, align 4
+ ret void
+
+; CHECK-LABEL: merge_vec_element_store
+; CHECK: vmovups
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+}
+
+; This is a minimized test based on real code that was failing.
+; We could merge stores (and loads) like this...
+
+define void @merge_vec_element_and_scalar_load([6 x i64]* %array) {
+ %idx0 = getelementptr inbounds [6 x i64]* %array, i64 0, i64 0
+ %idx1 = getelementptr inbounds [6 x i64]* %array, i64 0, i64 1
+ %idx4 = getelementptr inbounds [6 x i64]* %array, i64 0, i64 4
+ %idx5 = getelementptr inbounds [6 x i64]* %array, i64 0, i64 5
+
+ %a0 = load i64* %idx0, align 8
+ store i64 %a0, i64* %idx4, align 8
+
+ %b = bitcast i64* %idx1 to <2 x i64>*
+ %v = load <2 x i64>* %b, align 8
+ %a1 = extractelement <2 x i64> %v, i32 0
+ store i64 %a1, i64* %idx5, align 8
+ ret void
+
+; CHECK-LABEL: merge_vec_element_and_scalar_load
+; CHECK: movq (%rdi), %rax
+; CHECK-NEXT: movq %rax, 32(%rdi)
+; CHECK-NEXT: movq 8(%rdi), %rax
+; CHECK-NEXT: movq %rax, 40(%rdi)
+; CHECK-NEXT: retq
+}