1 //===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the SSEDomainFix pass.
12 // Some SSE instructions like mov, and, or, xor are available in different
13 // variants for different operand types. These variant instructions are
14 // equivalent, but on Nehalem and newer cpus there is extra latency
15 // transferring data between integer and floating point domains.
17 // This pass changes the variant instructions to minimize domain crossings.
19 //===----------------------------------------------------------------------===//
21 #define DEBUG_TYPE "sse-domain-fix"
22 #include "X86InstrInfo.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/ADT/DepthFirstIterator.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/raw_ostream.h"
31 class SSEDomainFixPass : public MachineFunctionPass {
33 const X86InstrInfo *TII;
36 MachineBasicBlock *MBB;
38 SSEDomainFixPass() : MachineFunctionPass(&ID) {}
40 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
42 MachineFunctionPass::getAnalysisUsage(AU);
45 virtual bool runOnMachineFunction(MachineFunction &MF);
47 virtual const char *getPassName() const {
48 return "SSE execution domain fixup";
52 void enterBasicBlock(MachineBasicBlock *MBB);
56 void SSEDomainFixPass::enterBasicBlock(MachineBasicBlock *mbb) {
58 DEBUG(dbgs() << "Entering MBB " << MBB->getName() << "\n");
61 bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
63 TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
65 MachineBasicBlock *Entry = MF->begin();
66 SmallPtrSet<MachineBasicBlock*, 16> Visited;
67 for (df_ext_iterator<MachineBasicBlock*,
68 SmallPtrSet<MachineBasicBlock*, 16> >
69 DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
71 enterBasicBlock(*DFI);
72 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
75 const unsigned *equiv = 0;
76 X86InstrInfo::SSEDomain domain = TII->GetSSEDomain(MI, equiv);
77 DEBUG(dbgs() << "isd-"[domain] << (equiv ? "* " : " ") << *MI);
83 FunctionPass *llvm::createSSEDomainFixPass() {
84 return new SSEDomainFixPass();
87 // These are the replaceable instructions. Some of these have _Int variants
88 // that we don't include here. We don't want to replace instructions selected
90 static const unsigned ReplaceableInstrs[][3] = {
91 //PackedInt PackedSingle PackedDouble
92 { X86::MOVDQAmr, X86::MOVAPSmr, X86::MOVAPDmr },
93 { X86::MOVDQArm, X86::MOVAPSrm, X86::MOVAPDrm },
94 { X86::MOVDQArr, X86::MOVAPSrr, X86::MOVAPDrr },
95 { X86::MOVDQUmr, X86::MOVUPSmr, X86::MOVUPDmr },
96 { X86::MOVDQUrm, X86::MOVUPSrm, X86::MOVUPDrm },
97 { X86::MOVNTDQmr, X86::MOVNTPSmr, X86::MOVNTPDmr },
98 { X86::PANDNrm, X86::ANDNPSrm, X86::ANDNPDrm },
99 { X86::PANDNrr, X86::ANDNPSrr, X86::ANDNPDrr },
100 { X86::PANDrm, X86::ANDPSrm, X86::ANDPDrm },
101 { X86::PANDrr, X86::ANDPSrr, X86::ANDPDrr },
102 { X86::PORrm, X86::ORPSrm, X86::ORPDrm },
103 { X86::PORrr, X86::ORPSrr, X86::ORPDrr },
104 { X86::PUNPCKHQDQrm, X86::UNPCKHPSrm, X86::UNPCKHPDrm },
105 { X86::PUNPCKHQDQrr, X86::UNPCKHPSrr, X86::UNPCKHPDrr },
106 { X86::PUNPCKLQDQrm, X86::UNPCKLPSrm, X86::UNPCKLPDrm },
107 { X86::PUNPCKLQDQrr, X86::UNPCKLPSrr, X86::UNPCKLPDrr },
108 { X86::PXORrm, X86::XORPSrm, X86::XORPDrm },
109 { X86::PXORrr, X86::XORPSrr, X86::XORPDrr },
112 void X86InstrInfo::populateSSEInstrDomainTable() {
113 // Instructions that execute in the packed integer domain.
114 static const unsigned PackedIntInstrs[] = {
134 X86::MOVZPQILo2PQIrm,
135 X86::MOVZPQILo2PQIrr,
200 X86::PCMPESTRM128MEM,
201 X86::PCMPESTRM128REG,
224 X86::PCMPISTRM128MEM,
225 X86::PCMPISTRM128REG,
242 X86::PHMINPOSUWrm128,
243 X86::PHMINPOSUWrr128,
392 // Instructions that execute in the packed single domain.
393 static const unsigned PackedSingleInstrs[] = {
458 // Instructions that execute in the packed double domain.
459 static const unsigned PackedDoubleInstrs[] = {
506 // Add non-negative entries for forcing instructions.
507 for (unsigned i = 0, e = array_lengthof(PackedIntInstrs); i != e; ++i)
508 SSEInstrDomainTable.insert(std::make_pair(PackedIntInstrs[i],
510 for (unsigned i = 0, e = array_lengthof(PackedSingleInstrs); i != e; ++i)
511 SSEInstrDomainTable.insert(std::make_pair(PackedSingleInstrs[i],
513 for (unsigned i = 0, e = array_lengthof(PackedDoubleInstrs); i != e; ++i)
514 SSEInstrDomainTable.insert(std::make_pair(PackedDoubleInstrs[i],
517 // Add row number + 1 for replaceable instructions.
518 for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
519 for (unsigned c = 0; c != 3; ++c)
520 SSEInstrDomainTable.insert(std::make_pair(ReplaceableInstrs[i][c],
524 X86InstrInfo::SSEDomain X86InstrInfo::GetSSEDomain(const MachineInstr *MI,
525 const unsigned *&equiv) const {
526 DenseMap<unsigned,unsigned>::const_iterator i =
527 SSEInstrDomainTable.find(MI->getOpcode());
528 if (i == SSEInstrDomainTable.end())
530 unsigned value = i->second;
532 equiv = ReplaceableInstrs[value/4 - 1];
535 return SSEDomain(value & 3);