1 //===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the SSEDomainFix pass.
12 // Some SSE instructions like mov, and, or, xor are available in different
13 // variants for different operand types. These variant instructions are
14 // equivalent, but on Nehalem and newer cpus there is extra latency
15 // transferring data between integer and floating point domains.
17 // This pass changes the variant instructions to minimize domain crossings.
19 //===----------------------------------------------------------------------===//
21 #define DEBUG_TYPE "sse-domain-fix"
22 #include "X86InstrInfo.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/ADT/DepthFirstIterator.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/raw_ostream.h"
33 /// Allocate objects from a pool, allow objects to be recycled, and provide a
34 /// way of deleting everything.
35 template<typename T, unsigned PageSize = 64>
37 std::vector<T*> Pages, Avail;
39 ~PoolAllocator() { Clear(); }
43 T *p = new T[PageSize];
45 Avail.reserve(PageSize);
46 for (unsigned n = 0; n != PageSize; ++n)
54 // Allow object to be reallocated. It won't be reconstructed.
60 // Destroy all objects, make sure there are no external pointers to them.
63 while (!Pages.empty()) {
64 delete[] Pages.back();
70 /// A DomainValue is a bit like LiveIntervals' ValNo, but it laso keeps track
71 /// of execution domains.
73 /// An open DomainValue represents a set of instructions that can still switch
74 /// execution domain. Multiple registers may refer to the same open
75 /// DomainValue - they will eventually be collapsed to the same execution
78 /// A collapsed DomainValue represents a single register that has been forced
79 /// into one of more execution domains. There is a separate collapsed
80 /// DomainValue for each register, but it may contain multiple execution
81 /// domains. A register value is initially created in a single execution
82 /// domain, but if we were forced to pay the penalty of a domain crossing, we
83 /// keep track of the fact the the register is now available in multiple
86 // Basic reference counting.
89 // Available domains. For an open DomainValue, it is the still possible
90 // domains for collapsing. For a collapsed DomainValue it is the domains where
91 // the register is available for free.
94 // Position of the last defining instruction.
97 // Twiddleable instructions using or defining these registers.
98 SmallVector<MachineInstr*, 8> Instrs;
100 // Collapsed DomainValue have no instructions to twiddle - it simply keeps
101 // track of the domains where the registers are already available.
102 bool collapsed() const { return Instrs.empty(); }
104 // Is any domain in mask available?
105 bool compat(unsigned mask) const {
109 // Mark domain as available.
110 void add(unsigned domain) {
111 Mask |= 1u << domain;
114 // First domain available in mask.
115 unsigned firstDomain() const {
116 return CountTrailingZeros_32(Mask);
119 DomainValue() { clear(); }
122 Refs = Mask = Dist = 0;
127 static const unsigned NumRegs = 16;
129 class SSEDomainFixPass : public MachineFunctionPass {
131 PoolAllocator<DomainValue> Pool;
134 const X86InstrInfo *TII;
135 const TargetRegisterInfo *TRI;
136 MachineBasicBlock *MBB;
137 DomainValue **LiveRegs;
138 typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
143 SSEDomainFixPass() : MachineFunctionPass(&ID) {}
145 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
146 AU.setPreservesAll();
147 MachineFunctionPass::getAnalysisUsage(AU);
150 virtual bool runOnMachineFunction(MachineFunction &MF);
152 virtual const char *getPassName() const {
153 return "SSE execution domain fixup";
158 int RegIndex(unsigned Reg);
160 // LiveRegs manipulations.
161 void SetLiveReg(int rx, DomainValue *DV);
163 void Force(int rx, unsigned domain);
164 void Collapse(DomainValue *dv, unsigned domain);
165 bool Merge(DomainValue *A, DomainValue *B);
167 void enterBasicBlock();
168 void visitGenericInstr(MachineInstr*);
169 void visitSoftInstr(MachineInstr*, unsigned mask);
170 void visitHardInstr(MachineInstr*, unsigned domain);
175 char SSEDomainFixPass::ID = 0;
177 /// Translate TRI register number to an index into our smaller tables of
178 /// interesting registers. Return -1 for boring registers.
179 int SSEDomainFixPass::RegIndex(unsigned reg) {
180 // Registers are sorted lexicographically.
181 // We just need them to be consecutive, ordering doesn't matter.
182 assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort");
184 return reg < NumRegs ? reg : -1;
187 /// Set LiveRegs[rx] = dv, updating reference counts.
188 void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) {
189 assert(unsigned(rx) < NumRegs && "Invalid index");
191 LiveRegs = (DomainValue**)calloc(sizeof(DomainValue*), NumRegs);
193 if (LiveRegs[rx] == dv)
196 assert(LiveRegs[rx]->Refs && "Bad refcount");
197 if (--LiveRegs[rx]->Refs == 0) Pool.Recycle(LiveRegs[rx]);
203 // Kill register rx, recycle or collapse any DomainValue.
204 void SSEDomainFixPass::Kill(int rx) {
205 assert(unsigned(rx) < NumRegs && "Invalid index");
206 if (!LiveRegs || !LiveRegs[rx]) return;
208 // Before killing the last reference to an open DomainValue, collapse it to
209 // the first available domain.
210 if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->collapsed())
211 Collapse(LiveRegs[rx], LiveRegs[rx]->firstDomain());
216 /// Force register rx into domain.
217 void SSEDomainFixPass::Force(int rx, unsigned domain) {
218 assert(unsigned(rx) < NumRegs && "Invalid index");
220 if (LiveRegs && (dv = LiveRegs[rx])) {
224 Collapse(dv, domain);
226 // Set up basic collapsed DomainValue.
234 /// Collapse open DomainValue into given domain. If there are multiple
235 /// registers using dv, they each get a unique collapsed DomainValue.
236 void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) {
237 assert(dv->compat(1u << domain) && "Cannot collapse");
239 // Collapse all the instructions.
240 while (!dv->Instrs.empty()) {
241 MachineInstr *mi = dv->Instrs.back();
242 TII->SetSSEDomain(mi, domain);
243 dv->Instrs.pop_back();
245 dv->Mask = 1u << domain;
247 // If there are multiple users, give them new, unique DomainValues.
248 if (LiveRegs && dv->Refs > 1) {
249 for (unsigned rx = 0; rx != NumRegs; ++rx)
250 if (LiveRegs[rx] == dv) {
251 DomainValue *dv2 = Pool.Alloc();
252 dv2->Dist = Distance;
259 /// Merge - All instructions and registers in B are moved to A, and B is
261 bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) {
262 assert(!A->collapsed() && "Cannot merge into collapsed");
263 assert(!B->collapsed() && "Cannot merge from collapsed");
266 if (!A->compat(B->Mask))
269 A->Dist = std::max(A->Dist, B->Dist);
270 A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
271 for (unsigned rx = 0; rx != NumRegs; ++rx)
272 if (LiveRegs[rx] == B)
277 void SSEDomainFixPass::enterBasicBlock() {
278 // Try to coalesce live-out registers from predecessors.
279 for (MachineBasicBlock::const_livein_iterator i = MBB->livein_begin(),
280 e = MBB->livein_end(); i != e; ++i) {
281 int rx = RegIndex(*i);
282 if (rx < 0) continue;
283 for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
284 pe = MBB->pred_end(); pi != pe; ++pi) {
285 LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
286 if (fi == LiveOuts.end()) continue;
287 DomainValue *pdv = fi->second[rx];
289 if (!LiveRegs || !LiveRegs[rx])
292 // We have a live DomainValue from more than one predecessor.
293 if (LiveRegs[rx]->collapsed()) {
294 // We are already collapsed, but predecessor is not. Force him.
295 if (!pdv->collapsed())
296 Collapse(pdv, LiveRegs[rx]->firstDomain());
298 // Currently open, merge in predecessor.
299 if (!pdv->collapsed())
300 Merge(LiveRegs[rx], pdv);
302 Collapse(LiveRegs[rx], pdv->firstDomain());
309 // A hard instruction only works in one domain. All input registers will be
310 // forced into that domain.
311 void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) {
312 // Collapse all uses.
313 for (unsigned i = mi->getDesc().getNumDefs(),
314 e = mi->getDesc().getNumOperands(); i != e; ++i) {
315 MachineOperand &mo = mi->getOperand(i);
316 if (!mo.isReg()) continue;
317 int rx = RegIndex(mo.getReg());
318 if (rx < 0) continue;
322 // Kill all defs and force them.
323 for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
324 MachineOperand &mo = mi->getOperand(i);
325 if (!mo.isReg()) continue;
326 int rx = RegIndex(mo.getReg());
327 if (rx < 0) continue;
333 // A soft instruction can be changed to work in other domains given by mask.
334 void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) {
335 // Scan the explicit use operands for incoming domains.
336 unsigned collmask = mask;
337 SmallVector<int, 4> used;
339 for (unsigned i = mi->getDesc().getNumDefs(),
340 e = mi->getDesc().getNumOperands(); i != e; ++i) {
341 MachineOperand &mo = mi->getOperand(i);
342 if (!mo.isReg()) continue;
343 int rx = RegIndex(mo.getReg());
344 if (rx < 0) continue;
345 if (DomainValue *dv = LiveRegs[rx]) {
346 // Is it possible to use this collapsed register for free?
347 if (dv->collapsed()) {
348 if (unsigned m = collmask & dv->Mask)
350 } else if (dv->compat(collmask))
357 // If the collapsed operands force a single domain, propagate the collapse.
358 if (isPowerOf2_32(collmask)) {
359 unsigned domain = CountTrailingZeros_32(collmask);
360 TII->SetSSEDomain(mi, domain);
361 visitHardInstr(mi, domain);
365 // Kill off any remaining uses that don't match collmask, and build a list of
366 // incoming DomainValue that we want to merge.
367 SmallVector<DomainValue*,4> doms;
368 for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
370 DomainValue *dv = LiveRegs[rx];
371 // This useless DomainValue could have been missed above.
372 if (!dv->compat(collmask)) {
376 // sorted, uniqued insert.
377 bool inserted = false;
378 for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
379 i != e && !inserted; ++i) {
382 else if (dv->Dist < (*i)->Dist) {
391 // doms are now sorted in order of appearance. Try to merge them all, giving
392 // priority to the latest ones.
394 while (!doms.empty()) {
397 else if (!Merge(dv, doms.back()))
398 for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i!=e; ++i)
399 if (LiveRegs[*i] == doms.back())
404 // dv is the DomainValue we are going to use for this instruction.
409 dv->Instrs.push_back(mi);
411 // Finally set all defs and non-collapsed uses to dv.
412 for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
413 MachineOperand &mo = mi->getOperand(i);
414 if (!mo.isReg()) continue;
415 int rx = RegIndex(mo.getReg());
416 if (rx < 0) continue;
417 if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
424 void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) {
425 // Process explicit defs, kill any XMM registers redefined.
426 for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
427 MachineOperand &mo = mi->getOperand(i);
428 if (!mo.isReg()) continue;
429 int rx = RegIndex(mo.getReg());
430 if (rx < 0) continue;
435 bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
437 TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
438 TRI = MF->getTarget().getRegisterInfo();
442 assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass");
444 // If no XMM registers are used in the function, we can skip it completely.
445 bool anyregs = false;
446 for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(),
447 E = X86::VR128RegClass.end(); I != E; ++I)
448 if (MF->getRegInfo().isPhysRegUsed(*I)) {
452 if (!anyregs) return false;
454 MachineBasicBlock *Entry = MF->begin();
455 SmallPtrSet<MachineBasicBlock*, 16> Visited;
456 for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
457 DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
461 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
463 MachineInstr *mi = I;
464 if (mi->isDebugValue()) continue;
466 std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi);
469 visitSoftInstr(mi, domp.second);
471 visitHardInstr(mi, domp.first);
473 visitGenericInstr(mi);
476 // Save live registers at end of MBB - used by enterBasicBlock().
478 LiveOuts.insert(std::make_pair(MBB, LiveRegs));
482 // Clear the LiveOuts vectors. Should we also collapse any remaining
484 for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
493 FunctionPass *llvm::createSSEDomainFixPass() {
494 return new SSEDomainFixPass();