/// \returns true if the memory operations A and B are consecutive.
bool isConsecutiveAccess(Value *A, Value *B);
- /// For consecutive loads (+(+ v0, v1)(+ v2, v3)), Left had v0 and v2
- /// while Right had v1 and v3, which prevented bundling them into
- /// a vector of loads. Rorder them so that Left now has v0 and v1
- /// while Right has v2 and v3 enabling their bundling into a vector.
- void reorderIfConsecutiveLoads(SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right);
-
/// \brief Perform LICM and CSE on the newly generated gather sequences.
void optimizeGatherSequence();
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right);
- reorderIfConsecutiveLoads (Left, Right);
buildTree_rec(Left, Depth + 1);
buildTree_rec(Right, Depth + 1);
return;
return X == PtrSCEVB;
}
-void BoUpSLP::reorderIfConsecutiveLoads(SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right) {
- for (unsigned i = 0, e = Left.size(); i < e - 1; ++i) {
- if (!isa<LoadInst>(Left[i]) || !isa<LoadInst>(Right[i]))
- return;
- if (!(isConsecutiveAccess(Left[i], Right[i]) &&
- isConsecutiveAccess(Right[i], Left[i + 1])))
- continue;
- else
- std::swap(Left[i + 1], Right[i]);
- }
-}
-
void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
Instruction *VL0 = cast<Instruction>(VL[0]);
BasicBlock::iterator NextInst = VL0;
case Instruction::Or:
case Instruction::Xor: {
ValueList LHSVL, RHSVL;
- if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
+ if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL);
- reorderIfConsecutiveLoads(LHSVL, RHSVL);
- } else
+ else
for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
+++ /dev/null
-; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu -mcpu=cortex-a57 | FileCheck %s
-target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
-target triple = "aarch64--linux-gnu"
-
-; float hadd (float *a) {
-; return (a[0] + a[1]) + (a[2] + a[3]);
-; }
-
-; CHECK-LABEL: @hadd
-; CHECK: load <2 x float>*
-; CHECK: fadd <2 x float>
-; CHECK: extractelement <2 x float>
-
-define float @hadd(float* nocapture readonly %a) {
-entry:
- %0 = load float* %a, align 4
- %arrayidx1 = getelementptr inbounds float* %a, i64 1
- %1 = load float* %arrayidx1, align 4
- %add = fadd float %0, %1
- %arrayidx2 = getelementptr inbounds float* %a, i64 2
- %2 = load float* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds float* %a, i64 3
- %3 = load float* %arrayidx3, align 4
- %add4 = fadd float %2, %3
- %add5 = fadd float %add, %add4
- ret float %add5
-}