+// Reorder commutative operations in alternate shuffle if the resulting vectors
+// are consecutive loads. This would allow us to vectorize the tree.
+// If we have something like-
+// load a[0] - load b[0]
+// load b[1] + load a[1]
+// load a[2] - load b[2]
+// load a[3] + load b[3]
+// Reordering the second load b[1] load a[1] would allow us to vectorize this
+// code.
+void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right) {
+ const DataLayout &DL = F->getParent()->getDataLayout();
+
+ // Push left and right operands of binary operation into Left and Right
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ Left.push_back(cast<Instruction>(VL[i])->getOperand(0));
+ Right.push_back(cast<Instruction>(VL[i])->getOperand(1));
+ }
+
+ // Reorder if we have a commutative operation and consecutive access
+ // are on either side of the alternate instructions.
+ for (unsigned j = 0; j < VL.size() - 1; ++j) {
+ if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
+ if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
+ Instruction *VL1 = cast<Instruction>(VL[j]);
+ Instruction *VL2 = cast<Instruction>(VL[j + 1]);
+ if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
+ std::swap(Left[j], Right[j]);
+ continue;
+ } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
+ std::swap(Left[j + 1], Right[j + 1]);
+ continue;
+ }
+ // else unchanged
+ }
+ }
+ if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
+ if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
+ Instruction *VL1 = cast<Instruction>(VL[j]);
+ Instruction *VL2 = cast<Instruction>(VL[j + 1]);
+ if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
+ std::swap(Left[j], Right[j]);
+ continue;
+ } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
+ std::swap(Left[j + 1], Right[j + 1]);
+ continue;
+ }
+ // else unchanged
+ }
+ }
+ }
+}
+
+// Return true if I should be commuted before adding it's left and right
+// operands to the arrays Left and Right.
+//
+// The vectorizer is trying to either have all elements one side being
+// instruction with the same opcode to enable further vectorization, or having
+// a splat to lower the vectorizing cost.
+static bool shouldReorderOperands(int i, Instruction &I,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right,
+ bool AllSameOpcodeLeft,
+ bool AllSameOpcodeRight, bool SplatLeft,
+ bool SplatRight) {
+ Value *VLeft = I.getOperand(0);
+ Value *VRight = I.getOperand(1);
+ // If we have "SplatRight", try to see if commuting is needed to preserve it.
+ if (SplatRight) {
+ if (VRight == Right[i - 1])
+ // Preserve SplatRight
+ return false;
+ if (VLeft == Right[i - 1]) {
+ // Commuting would preserve SplatRight, but we don't want to break
+ // SplatLeft either, i.e. preserve the original order if possible.
+ // (FIXME: why do we care?)
+ if (SplatLeft && VLeft == Left[i - 1])
+ return false;
+ return true;
+ }
+ }
+ // Symmetrically handle Right side.
+ if (SplatLeft) {
+ if (VLeft == Left[i - 1])
+ // Preserve SplatLeft
+ return false;
+ if (VRight == Left[i - 1])
+ return true;
+ }
+
+ Instruction *ILeft = dyn_cast<Instruction>(VLeft);
+ Instruction *IRight = dyn_cast<Instruction>(VRight);
+
+ // If we have "AllSameOpcodeRight", try to see if the left operands preserves
+ // it and not the right, in this case we want to commute.
+ if (AllSameOpcodeRight) {
+ unsigned RightPrevOpcode = cast<Instruction>(Right[i - 1])->getOpcode();
+ if (IRight && RightPrevOpcode == IRight->getOpcode())
+ // Do not commute, a match on the right preserves AllSameOpcodeRight
+ return false;
+ if (ILeft && RightPrevOpcode == ILeft->getOpcode()) {
+ // We have a match and may want to commute, but first check if there is
+ // not also a match on the existing operands on the Left to preserve
+ // AllSameOpcodeLeft, i.e. preserve the original order if possible.
+ // (FIXME: why do we care?)
+ if (AllSameOpcodeLeft && ILeft &&
+ cast<Instruction>(Left[i - 1])->getOpcode() == ILeft->getOpcode())
+ return false;
+ return true;
+ }
+ }
+ // Symmetrically handle Left side.
+ if (AllSameOpcodeLeft) {
+ unsigned LeftPrevOpcode = cast<Instruction>(Left[i - 1])->getOpcode();
+ if (ILeft && LeftPrevOpcode == ILeft->getOpcode())
+ return false;
+ if (IRight && LeftPrevOpcode == IRight->getOpcode())
+ return true;
+ }
+ return false;
+}
+
+void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right) {
+
+ if (VL.size()) {
+ // Peel the first iteration out of the loop since there's nothing
+ // interesting to do anyway and it simplifies the checks in the loop.
+ auto VLeft = cast<Instruction>(VL[0])->getOperand(0);
+ auto VRight = cast<Instruction>(VL[0])->getOperand(1);
+ if (!isa<Instruction>(VRight) && isa<Instruction>(VLeft))
+ // Favor having instruction to the right. FIXME: why?
+ std::swap(VLeft, VRight);
+ Left.push_back(VLeft);
+ Right.push_back(VRight);
+ }
+
+ // Keep track if we have instructions with all the same opcode on one side.
+ bool AllSameOpcodeLeft = isa<Instruction>(Left[0]);
+ bool AllSameOpcodeRight = isa<Instruction>(Right[0]);
+ // Keep track if we have one side with all the same value (broadcast).
+ bool SplatLeft = true;
+ bool SplatRight = true;
+
+ for (unsigned i = 1, e = VL.size(); i != e; ++i) {
+ Instruction *I = cast<Instruction>(VL[i]);
+ assert(I->isCommutative() && "Can only process commutative instruction");
+ // Commute to favor either a splat or maximizing having the same opcodes on
+ // one side.
+ if (shouldReorderOperands(i, *I, Left, Right, AllSameOpcodeLeft,
+ AllSameOpcodeRight, SplatLeft, SplatRight)) {
+ Left.push_back(I->getOperand(1));
+ Right.push_back(I->getOperand(0));
+ } else {
+ Left.push_back(I->getOperand(0));
+ Right.push_back(I->getOperand(1));
+ }
+ // Update Splat* and AllSameOpcode* after the insertion.
+ SplatRight = SplatRight && (Right[i - 1] == Right[i]);
+ SplatLeft = SplatLeft && (Left[i - 1] == Left[i]);
+ AllSameOpcodeLeft = AllSameOpcodeLeft && isa<Instruction>(Left[i]) &&
+ (cast<Instruction>(Left[i - 1])->getOpcode() ==
+ cast<Instruction>(Left[i])->getOpcode());
+ AllSameOpcodeRight = AllSameOpcodeRight && isa<Instruction>(Right[i]) &&
+ (cast<Instruction>(Right[i - 1])->getOpcode() ==
+ cast<Instruction>(Right[i])->getOpcode());
+ }
+
+ // If one operand end up being broadcast, return this operand order.
+ if (SplatRight || SplatLeft)
+ return;
+
+ const DataLayout &DL = F->getParent()->getDataLayout();
+
+ // Finally check if we can get longer vectorizable chain by reordering
+ // without breaking the good operand order detected above.
+ // E.g. If we have something like-
+ // load a[0] load b[0]
+ // load b[1] load a[1]
+ // load a[2] load b[2]
+ // load a[3] load b[3]
+ // Reordering the second load b[1] load a[1] would allow us to vectorize
+ // this code and we still retain AllSameOpcode property.
+ // FIXME: This load reordering might break AllSameOpcode in some rare cases
+ // such as-
+ // add a[0],c[0] load b[0]
+ // add a[1],c[2] load b[1]
+ // b[2] load b[2]
+ // add a[3],c[3] load b[3]
+ for (unsigned j = 0; j < VL.size() - 1; ++j) {
+ if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
+ if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
+ if (isConsecutiveAccess(L, L1, DL)) {
+ std::swap(Left[j + 1], Right[j + 1]);
+ continue;
+ }
+ }
+ }
+ if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
+ if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
+ if (isConsecutiveAccess(L, L1, DL)) {
+ std::swap(Left[j + 1], Right[j + 1]);
+ continue;
+ }
+ }
+ }
+ // else unchanged
+ }
+}
+