-//===-- BuildShuffleTable.cpp - Perfect Shuffle Generator -----------------===//
+//===-- PerfectShuffle.cpp - Perfect Shuffle Generator --------------------===//
//
// The LLVM Compiler Infrastructure
//
-// This file was developed by Chris Lattner and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//
//===----------------------------------------------------------------------===//
+#include <cassert>
+#include <cstdlib>
+#include <iomanip>
#include <iostream>
#include <vector>
-
struct Operator;
// Masks are 4-nibble hex numbers. Values 0-7 in any nibble means that it takes
-// an element from that value of the input vectors. A value of 8 means the
+// an element from that value of the input vectors. A value of 8 means the
// entry is undefined.
// Mask manipulation functions.
-static inline unsigned short MakeMask(unsigned V0, unsigned V1,
+static inline unsigned short MakeMask(unsigned V0, unsigned V1,
unsigned V2, unsigned V3) {
return (V0 << (3*4)) | (V1 << (2*4)) | (V2 << (1*4)) | (V3 << (0*4));
}
/// getLHSOnlyMask - Given a mask that refers to its LHS and RHS, modify it to
/// refer to the LHS only (for when one argument value is passed into the same
/// function twice).
+#if 0
static unsigned short getLHSOnlyMask(unsigned short Mask) {
return Mask & 0xBBBB; // Keep only LHS and Undefs.
}
+#endif
/// getCompressedMask - Turn a 16-bit uncompressed mask (where each elt uses 4
/// bits) into a compressed 13-bit mask, where each elt is multiplied by 9.
static unsigned getCompressedMask(unsigned short Mask) {
- return getMaskElt(Mask, 0)*9*9*9 + getMaskElt(Mask, 1)*9*9 +
+ return getMaskElt(Mask, 0)*9*9*9 + getMaskElt(Mask, 1)*9*9 +
getMaskElt(Mask, 2)*9 + getMaskElt(Mask, 3);
}
unsigned Cost; // Number of instrs used to generate this value.
Operator *Op; // The Operation used to generate this value.
unsigned short Arg0, Arg1; // Input operands for this value.
-
+
ShuffleVal() : Cost(1000000) {}
};
unsigned short ShuffleMask;
unsigned short OpNum;
const char *Name;
-
- Operator(unsigned short shufflemask, const char *name)
- : ShuffleMask(shufflemask), Name(name) {
- OpNum = TheOperators.size();
+ unsigned Cost;
+
+ Operator(unsigned short shufflemask, const char *name, unsigned opnum,
+ unsigned cost = 1)
+ : ShuffleMask(shufflemask), OpNum(opnum), Name(name), Cost(cost) {
TheOperators.push_back(this);
}
~Operator() {
assert(TheOperators.back() == this);
TheOperators.pop_back();
}
-
+
bool isOnlyLHSOperator() const {
return isOnlyLHSMask(ShuffleMask);
}
-
+
const char *getName() const { return Name; }
-
+ unsigned getCost() const { return Cost; }
+
unsigned short getTransformedMask(unsigned short LHSMask, unsigned RHSMask) {
// Extract the elements from LHSMask and RHSMask, as appropriate.
unsigned Result = 0;
std::cerr << "t" << ValNo;
PrintMask(ThisOp, std::cerr);
std::cerr << " = " << ShufTab[ThisOp].Op->getName() << "(";
-
+
if (ShufTab[ShufTab[ThisOp].Arg0].Cost == 0) {
std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg0);
PrintMask(ShufTab[ThisOp].Arg0, std::cerr);
break;
}
}
-
+
if (!ShufTab[Vals[ValNo]].Op->isOnlyLHSOperator()) {
std::cerr << ", ";
if (ShufTab[ShufTab[ThisOp].Arg1].Cost == 0) {
return Count;
}
-static void EvaluateOps(unsigned short Elt, unsigned short Vals[],
+static void EvaluateOps(unsigned short Elt, unsigned short Vals[],
unsigned &NumVals) {
if (ShufTab[Elt].Cost == 0) return;
// If this value has already been evaluated, it is free. FIXME: match undefs.
for (unsigned i = 0, e = NumVals; i != e; ++i)
if (Vals[i] == Elt) return;
-
+
// Otherwise, get the operands of the value, then add it.
unsigned Arg0 = ShufTab[Elt].Arg0, Arg1 = ShufTab[Elt].Arg1;
if (ShufTab[Arg0].Cost)
EvaluateOps(Arg0, Vals, NumVals);
if (Arg0 != Arg1 && ShufTab[Arg1].Cost)
EvaluateOps(Arg1, Vals, NumVals);
-
+
Vals[NumVals++] = Elt;
}
ShufTab[0x4567].Cost = 0;
ShufTab[0x4567].Op = 0;
ShufTab[0x4567].Arg0 = 0x4567;
-
+
// Seed the first-level of shuffles, shuffles whose inputs are the input to
// the vectorshuffle operation.
bool MadeChange = true;
++OpCount;
std::cerr << "Starting iteration #" << OpCount << " with "
<< getNumEntered() << " entries established.\n";
-
+
// Scan the table for two reasons: First, compute the maximum cost of any
// operation left in the table. Second, make sure that values with undefs
// have the cheapest alternative that they match.
if (!isValidMask(i)) continue;
if (ShufTab[i].Cost > MaxCost)
MaxCost = ShufTab[i].Cost;
-
+
// If this value has an undef, make it be computed the cheapest possible
// way of any of the things that it matches.
if (hasUndefElements(i)) {
UndefIdx = 3;
else
abort();
-
+
unsigned MinVal = i;
unsigned MinCost = ShufTab[i].Cost;
-
+
// Scan the 8 entries.
for (unsigned j = 0; j != 8; ++j) {
unsigned NewElt = setMaskElt(i, UndefIdx, j);
MinVal = NewElt;
}
}
-
+
// If we found something cheaper than what was here before, use it.
if (i != MinVal) {
MadeChange = true;
ShufTab[i] = ShufTab[MinVal];
}
- }
+ }
}
-
+
for (unsigned LHS = 0; LHS != 0x8889; ++LHS) {
if (!isValidMask(LHS)) continue;
if (ShufTab[LHS].Cost > 1000) continue;
// we already have, don't consider it.
if (ShufTab[LHS].Cost + 1 >= MaxCost)
continue;
-
+
for (unsigned opnum = 0, e = TheOperators.size(); opnum != e; ++opnum) {
Operator *Op = TheOperators[opnum];
- unsigned short Mask = Op->ShuffleMask;
// Evaluate op(LHS,LHS)
unsigned ResultMask = Op->getTransformedMask(LHS, LHS);
- unsigned Cost = ShufTab[LHS].Cost + 1;
+ unsigned Cost = ShufTab[LHS].Cost + Op->getCost();
if (Cost < ShufTab[ResultMask].Cost) {
ShufTab[ResultMask].Cost = Cost;
ShufTab[ResultMask].Op = Op;
ShufTab[ResultMask].Arg1 = LHS;
MadeChange = true;
}
-
+
// If this is a two input instruction, include the op(x,y) cases. If
// this is a one input instruction, skip this.
if (Op->isOnlyLHSOperator()) continue;
-
+
for (unsigned RHS = 0; RHS != 0x8889; ++RHS) {
if (!isValidMask(RHS)) continue;
if (ShufTab[RHS].Cost > 1000) continue;
-
+
// If nothing involving this operand could possibly be cheaper than
// what we already have, don't consider it.
if (ShufTab[RHS].Cost + 1 >= MaxCost)
continue;
-
+
// Evaluate op(LHS,RHS)
unsigned ResultMask = Op->getTransformedMask(LHS, RHS);
ShufTab[ResultMask].Cost <= ShufTab[LHS].Cost ||
ShufTab[ResultMask].Cost <= ShufTab[RHS].Cost)
continue;
-
+
// Figure out the cost to evaluate this, knowing that CSE's only need
// to be evaluated once.
unsigned short Vals[30];
EvaluateOps(LHS, Vals, NumVals);
EvaluateOps(RHS, Vals, NumVals);
- unsigned Cost = NumVals + 1;
+ unsigned Cost = NumVals + Op->getCost();
if (Cost < ShufTab[ResultMask].Cost) {
ShufTab[ResultMask].Cost = Cost;
ShufTab[ResultMask].Op = Op;
}
}
}
-
+
std::cerr << "Finished Table has " << getNumEntered()
<< " entries established.\n";
-
+
unsigned CostArray[10] = { 0 };
// Compute a cost histogram.
else
++CostArray[ShufTab[i].Cost];
}
-
+
for (unsigned i = 0; i != 9; ++i)
if (CostArray[i])
std::cout << "// " << CostArray[i] << " entries have cost " << i << "\n";
if (CostArray[9])
std::cout << "// " << CostArray[9] << " entries have higher cost!\n";
-
-
+
+
// Build up the table to emit.
std::cout << "\n// This table is 6561*4 = 26244 bytes in size.\n";
- std::cout << "static const unsigned InstrTab[6561+1] = {\n";
-
+ std::cout << "static const unsigned PerfectShuffleTable[6561+1] = {\n";
+
for (unsigned i = 0; i != 0x8889; ++i) {
if (!isValidMask(i)) continue;
-
+
// CostSat - The cost of this operation saturated to two bits.
unsigned CostSat = ShufTab[i].Cost;
- if (CostSat > 3) CostSat = 3;
-
+ if (CostSat > 4) CostSat = 4;
+ if (CostSat == 0) CostSat = 1;
+ --CostSat; // Cost is now between 0-3.
+
unsigned OpNum = ShufTab[i].Op ? ShufTab[i].Op->OpNum : 0;
assert(OpNum < 16 && "Too few bits to encode operation!");
-
+
unsigned LHS = getCompressedMask(ShufTab[i].Arg0);
unsigned RHS = getCompressedMask(ShufTab[i].Arg1);
-
+
// Encode this as 2 bits of saturated cost, 4 bits of opcodes, 13 bits of
// LHS, and 13 bits of RHS = 32 bits.
- unsigned Val = (CostSat << 30) | (OpNum << 27) | (LHS << 13) | RHS;
+ unsigned Val = (CostSat << 30) | (OpNum << 26) | (LHS << 13) | RHS;
- std::cout << " " << Val << "U,\t// ";
+ std::cout << " " << std::setw(10) << Val << "U, // ";
PrintMask(i, std::cout);
std::cout << ": Cost " << ShufTab[i].Cost;
std::cout << " " << (ShufTab[i].Op ? ShufTab[i].Op->getName() : "copy");
}
}
std::cout << "\n";
- }
+ }
std::cout << " 0\n};\n";
if (0) {
if (ShufTab[i].Cost < 1000) {
PrintMask(i, std::cerr);
std::cerr << " - Cost " << ShufTab[i].Cost << " - ";
-
+
unsigned short Vals[30];
unsigned NumVals = 0;
EvaluateOps(i, Vals, NumVals);
}
+#ifdef GENERATE_ALTIVEC
///===---------------------------------------------------------------------===//
/// The altivec instruction definitions. This is the altivec-specific part of
/// this file.
///===---------------------------------------------------------------------===//
+// Note that the opcode numbers here must match those in the PPC backend.
+enum {
+ OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+ OP_VMRGHW,
+ OP_VMRGLW,
+ OP_VSPLTISW0,
+ OP_VSPLTISW1,
+ OP_VSPLTISW2,
+ OP_VSPLTISW3,
+ OP_VSLDOI4,
+ OP_VSLDOI8,
+ OP_VSLDOI12
+};
+
struct vmrghw : public Operator {
- vmrghw() : Operator(0x0415, "vmrghw") {}
+ vmrghw() : Operator(0x0415, "vmrghw", OP_VMRGHW) {}
} the_vmrghw;
struct vmrglw : public Operator {
- vmrglw() : Operator(0x2637, "vmrglw") {}
+ vmrglw() : Operator(0x2637, "vmrglw", OP_VMRGLW) {}
} the_vmrglw;
template<unsigned Elt>
struct vspltisw : public Operator {
- vspltisw(const char *N) : Operator(MakeMask(Elt, Elt, Elt, Elt), N) {}
+ vspltisw(const char *N, unsigned Opc)
+ : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {}
};
-vspltisw<0> the_vspltisw0("vspltisw0");
-vspltisw<1> the_vspltisw1("vspltisw1");
-vspltisw<2> the_vspltisw2("vspltisw2");
-vspltisw<3> the_vspltisw3("vspltisw3");
+vspltisw<0> the_vspltisw0("vspltisw0", OP_VSPLTISW0);
+vspltisw<1> the_vspltisw1("vspltisw1", OP_VSPLTISW1);
+vspltisw<2> the_vspltisw2("vspltisw2", OP_VSPLTISW2);
+vspltisw<3> the_vspltisw3("vspltisw3", OP_VSPLTISW3);
template<unsigned N>
struct vsldoi : public Operator {
- vsldoi(const char *n) : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), n){
+ vsldoi(const char *Name, unsigned Opc)
+ : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) {
}
};
-vsldoi<1> the_vsldoi1("vsldoi4");
-vsldoi<2> the_vsldoi2("vsldoi8");
-vsldoi<3> the_vsldoi3("vsldoi12");
+vsldoi<1> the_vsldoi1("vsldoi4" , OP_VSLDOI4);
+vsldoi<2> the_vsldoi2("vsldoi8" , OP_VSLDOI8);
+vsldoi<3> the_vsldoi3("vsldoi12", OP_VSLDOI12);
+
+#endif
+
+#define GENERATE_NEON
+
+#ifdef GENERATE_NEON
+enum {
+ OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+ OP_VREV,
+ OP_VDUP0,
+ OP_VDUP1,
+ OP_VDUP2,
+ OP_VDUP3,
+ OP_VEXT1,
+ OP_VEXT2,
+ OP_VEXT3,
+ OP_VUZPL, // VUZP, left result
+ OP_VUZPR, // VUZP, right result
+ OP_VZIPL, // VZIP, left result
+ OP_VZIPR, // VZIP, right result
+ OP_VTRNL, // VTRN, left result
+ OP_VTRNR // VTRN, right result
+};
+
+struct vrev : public Operator {
+ vrev() : Operator(0x1032, "vrev", OP_VREV) {}
+} the_vrev;
+
+template<unsigned Elt>
+struct vdup : public Operator {
+ vdup(const char *N, unsigned Opc)
+ : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {}
+};
+
+vdup<0> the_vdup0("vdup0", OP_VDUP0);
+vdup<1> the_vdup1("vdup1", OP_VDUP1);
+vdup<2> the_vdup2("vdup2", OP_VDUP2);
+vdup<3> the_vdup3("vdup3", OP_VDUP3);
+
+template<unsigned N>
+struct vext : public Operator {
+ vext(const char *Name, unsigned Opc)
+ : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) {
+ }
+};
+
+vext<1> the_vext1("vext1", OP_VEXT1);
+vext<2> the_vext2("vext2", OP_VEXT2);
+vext<3> the_vext3("vext3", OP_VEXT3);
+
+struct vuzpl : public Operator {
+ vuzpl() : Operator(0x0246, "vuzpl", OP_VUZPL, 2) {}
+} the_vuzpl;
+
+struct vuzpr : public Operator {
+ vuzpr() : Operator(0x1357, "vuzpr", OP_VUZPR, 2) {}
+} the_vuzpr;
+
+struct vzipl : public Operator {
+ vzipl() : Operator(0x0415, "vzipl", OP_VZIPL, 2) {}
+} the_vzipl;
+
+struct vzipr : public Operator {
+ vzipr() : Operator(0x2637, "vzipr", OP_VZIPR, 2) {}
+} the_vzipr;
+
+struct vtrnl : public Operator {
+ vtrnl() : Operator(0x0426, "vtrnl", OP_VTRNL, 2) {}
+} the_vtrnl;
+
+struct vtrnr : public Operator {
+ vtrnr() : Operator(0x1537, "vtrnr", OP_VTRNR, 2) {}
+} the_vtrnr;
+#endif