+// Reorder commutative operations in alternate shuffle if the resulting vectors
+// are consecutive loads. This would allow us to vectorize the tree.
+// If we have something like-
+// load a[0] - load b[0]
+// load b[1] + load a[1]
+// load a[2] - load b[2]
+// load a[3] + load b[3]
+// Reordering the second load b[1] load a[1] would allow us to vectorize this
+// code.
+void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right) {
+ const DataLayout &DL = F->getParent()->getDataLayout();
+
+ // Push left and right operands of binary operation into Left and Right
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ Left.push_back(cast<Instruction>(VL[i])->getOperand(0));
+ Right.push_back(cast<Instruction>(VL[i])->getOperand(1));
+ }
+
+ // Reorder if we have a commutative operation and consecutive access
+ // are on either side of the alternate instructions.
+ for (unsigned j = 0; j < VL.size() - 1; ++j) {
+ if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
+ if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
+ Instruction *VL1 = cast<Instruction>(VL[j]);
+ Instruction *VL2 = cast<Instruction>(VL[j + 1]);
+ if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
+ std::swap(Left[j], Right[j]);
+ continue;
+ } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
+ std::swap(Left[j + 1], Right[j + 1]);
+ continue;
+ }
+ // else unchanged
+ }
+ }
+ if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
+ if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
+ Instruction *VL1 = cast<Instruction>(VL[j]);
+ Instruction *VL2 = cast<Instruction>(VL[j + 1]);
+ if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
+ std::swap(Left[j], Right[j]);
+ continue;
+ } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
+ std::swap(Left[j + 1], Right[j + 1]);
+ continue;
+ }
+ // else unchanged
+ }
+ }
+ }
+}
+
+
+// Return true if I should be commuted before adding it's left and right
+// operands to the arrays Left and Right.
+//
+// The vectorizer is trying to either have all elements one side being
+// instruction with the same opcode to enable further vectorization, or having
+// a splat to lower the vectorizing cost.
+static bool shouldReorderOperands(int i, Instruction &I,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right) {
+ Value *VLeft = I.getOperand(0);
+ Value *VRight = I.getOperand(1);
+ Instruction *ILeft = dyn_cast<Instruction>(VLeft);
+ Instruction *IRight = dyn_cast<Instruction>(VRight);
+
+ // Sort two opcodes. In the code below we try to preserve the ability to use
+ // broadcast of values instead of individual inserts.
+ // vl1 = load
+ // vl2 = phi
+ // vr1 = load
+ // vr2 = vr2
+ // = vl1 x vr1
+ // = vl2 x vr2
+ // If we just sorted according to opcode we would leave the first line in
+ // tact but we would swap vl2 with vr2 because opcode(phi) > opcode(load).
+ // = vl1 x vr1
+ // = vr2 x vl2
+ // Because vr2 and vr1 are from the same load we loose the opportunity of a
+ // broadcast for the packed right side in the backend: we have [vr1, vl2]
+ // instead of [vr1, vr2=vr1].
+ if (ILeft && IRight) {
+ if (ILeft->getOpcode() > IRight->getOpcode() &&
+ Right[i - 1] != IRight) {
+ // Try not to destroy a broad cast for no apparent benefit.
+ return true;
+ } else if (ILeft->getOpcode() == IRight->getOpcode() &&
+ Right[i - 1] == ILeft) {
+ // Try preserve broadcasts.
+ return true;
+ } else if (ILeft->getOpcode() == IRight->getOpcode() &&
+ Left[i - 1] == IRight) {
+ // Try preserve broadcasts.
+ return true;
+ }
+ return false;
+ }
+ // One opcode, put the instruction on the right.
+ if (ILeft) {
+ return true;
+ }
+ return false;
+}
+
+void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right) {
+
+ SmallVector<Value *, 16> OrigLeft, OrigRight;
+
+ if (VL.size()) {
+ // Peel the first iteration out of the loop since there's nothing
+ // interesting to do anyway and it simplifies the checks
+ auto VLeft = cast<Instruction>(VL[0])->getOperand(0);
+ auto VRight = cast<Instruction>(VL[0])->getOperand(1);
+ OrigLeft.push_back(VLeft);
+ OrigRight.push_back(VRight);
+ if (!isa<Instruction>(VRight) && isa<Instruction>(VLeft))
+ // Favor having instruction to the right. FIXME: why?
+ std::swap(VLeft, VRight);
+ Left.push_back(VLeft);
+ Right.push_back(VRight);
+ }
+
+ // Keep track if we have instructions with all the same opcode on one side.
+ bool AllSameOpcodeLeft = isa<Instruction>(Left[0]);
+ bool AllSameOpcodeRight = isa<Instruction>(Right[0]);
+
+ for (unsigned i = 1, e = VL.size(); i != e; ++i) {
+ Instruction *I = cast<Instruction>(VL[i]);
+
+ Value *VLeft = I->getOperand(0);
+ Value *VRight = I->getOperand(1);
+ OrigLeft.push_back(VLeft);
+ OrigRight.push_back(VRight);
+ Instruction *ILeft = dyn_cast<Instruction>(VLeft);
+ Instruction *IRight = dyn_cast<Instruction>(VRight);
+
+ // Check whether all operands on one side have the same opcode. In this case
+ // we want to preserve the original order and not make things worse by
+ // reordering.
+ if (AllSameOpcodeLeft && ILeft) {
+ if (Instruction *PLeft = dyn_cast<Instruction>(OrigLeft[i - 1])) {
+ if (PLeft->getOpcode() != ILeft->getOpcode())
+ AllSameOpcodeLeft = false;
+ } else
+ AllSameOpcodeLeft = false;
+ }
+ if (AllSameOpcodeRight && IRight) {
+ if (Instruction *PRight = dyn_cast<Instruction>(OrigRight[i - 1])) {
+ if (PRight->getOpcode() != IRight->getOpcode())
+ AllSameOpcodeRight = false;
+ } else
+ AllSameOpcodeRight = false;
+ }
+
+
+ // Commute to favor either a splat or maximizing having the same opcodes on
+ // one side.
+ if (shouldReorderOperands(i, *I, Left, Right)) {
+ Left.push_back(I->getOperand(1));
+ Right.push_back(I->getOperand(0));
+ } else {
+ Left.push_back(I->getOperand(0));
+ Right.push_back(I->getOperand(1));
+ }
+ }
+
+ bool LeftBroadcast = isSplat(Left);
+ bool RightBroadcast = isSplat(Right);
+
+ // If operands end up being broadcast return this operand order.
+ if (LeftBroadcast || RightBroadcast)
+ return;
+
+ // Don't reorder if the operands where good to begin.
+ if (AllSameOpcodeRight || AllSameOpcodeLeft) {
+ Left = OrigLeft;
+ Right = OrigRight;
+ }
+
+ const DataLayout &DL = F->getParent()->getDataLayout();
+
+ // Finally check if we can get longer vectorizable chain by reordering
+ // without breaking the good operand order detected above.
+ // E.g. If we have something like-
+ // load a[0] load b[0]
+ // load b[1] load a[1]
+ // load a[2] load b[2]
+ // load a[3] load b[3]
+ // Reordering the second load b[1] load a[1] would allow us to vectorize
+ // this code and we still retain AllSameOpcode property.
+ // FIXME: This load reordering might break AllSameOpcode in some rare cases
+ // such as-
+ // add a[0],c[0] load b[0]
+ // add a[1],c[2] load b[1]
+ // b[2] load b[2]
+ // add a[3],c[3] load b[3]
+ for (unsigned j = 0; j < VL.size() - 1; ++j) {
+ if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
+ if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
+ if (isConsecutiveAccess(L, L1, DL)) {
+ std::swap(Left[j + 1], Right[j + 1]);
+ continue;
+ }
+ }
+ }
+ if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
+ if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
+ if (isConsecutiveAccess(L, L1, DL)) {
+ std::swap(Left[j + 1], Right[j + 1]);
+ continue;
+ }
+ }
+ }
+ // else unchanged
+ }
+}
+