Add a late SSEDomainFix pass that twiddles SSE instructions to avoid domain crossings.
authorJakob Stoklund Olesen <stoklund@2pi.dk>
Tue, 23 Mar 2010 23:14:44 +0000 (23:14 +0000)
committerJakob Stoklund Olesen <stoklund@2pi.dk>
Tue, 23 Mar 2010 23:14:44 +0000 (23:14 +0000)
This is work in progress. So far, SSE execution domain tables are added to
X86InstrInfo, and a skeleton pass is enabled with -sse-domain-fix.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99345 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/CMakeLists.txt
lib/Target/X86/SSEDomainFix.cpp [new file with mode: 0644]
lib/Target/X86/X86.h
lib/Target/X86/X86InstrInfo.cpp
lib/Target/X86/X86InstrInfo.h
lib/Target/X86/X86TargetMachine.cpp
lib/Target/X86/X86TargetMachine.h

index 4d3dedf0e5f1e1202271e08dcc34c63c3db3b9ae..22285f1932347666e2e8ec5b82ad5b2226e44004 100644 (file)
@@ -15,6 +15,7 @@ tablegen(X86GenCallingConv.inc -gen-callingconv)
 tablegen(X86GenSubtarget.inc -gen-subtarget)
 
 set(sources
+  SSEDomainFix.cpp
   X86AsmBackend.cpp
   X86CodeEmitter.cpp
   X86COFFMachineModuleInfo.cpp
diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp
new file mode 100644 (file)
index 0000000..419c675
--- /dev/null
@@ -0,0 +1,536 @@
+//===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SSEDomainFix pass.
+//
+// Some SSE instructions like mov, and, or, xor are available in different
+// variants for different operand types. These variant instructions are
+// equivalent, but on Nehalem and newer cpus there is extra latency
+// transferring data between integer and floating point domains.
+//
+// This pass changes the variant instructions to minimize domain crossings.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sse-domain-fix"
+#include "X86InstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class SSEDomainFixPass : public MachineFunctionPass {
+  static char ID;
+  const X86InstrInfo *TII;
+
+  MachineFunction *MF;
+  MachineBasicBlock *MBB;
+public:
+  SSEDomainFixPass() : MachineFunctionPass(&ID) {}
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  virtual const char *getPassName() const {
+    return "SSE execution domain fixup";
+  }
+
+private:
+  void enterBasicBlock(MachineBasicBlock *MBB);
+};
+}
+
+void SSEDomainFixPass::enterBasicBlock(MachineBasicBlock *mbb) {
+  MBB = mbb;
+  DEBUG(dbgs() << "Entering MBB " << MBB->getName() << "\n");
+}
+
+bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
+
+  MachineBasicBlock *Entry = MF->begin();
+  SmallPtrSet<MachineBasicBlock*, 16> Visited;
+  for (df_ext_iterator<MachineBasicBlock*,
+         SmallPtrSet<MachineBasicBlock*, 16> >
+         DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
+       DFI != DFE; ++DFI) {
+    enterBasicBlock(*DFI);
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+        ++I) {
+      MachineInstr *MI = I;
+      const unsigned *equiv = 0;
+      X86InstrInfo::SSEDomain domain = TII->GetSSEDomain(MI, equiv);
+      DEBUG(dbgs() << "isd-"[domain] << (equiv ? "* " : "  ") << *MI);
+    }
+  }
+  return false;
+}
+
+FunctionPass *llvm::createSSEDomainFixPass() {
+  return new SSEDomainFixPass();
+}
+
+// These are the replaceable instructions. Some of these have _Int variants
+// that we don't include here. We don't want to replace instructions selected
+// by intrinsics.
+static const unsigned ReplaceableInstrs[][3] = {
+  //PackedInt          PackedSingle     PackedDouble
+  { X86::MOVDQAmr,     X86::MOVAPSmr,   X86::MOVAPDmr   },
+  { X86::MOVDQArm,     X86::MOVAPSrm,   X86::MOVAPDrm   },
+  { X86::MOVDQArr,     X86::MOVAPSrr,   X86::MOVAPDrr   },
+  { X86::MOVDQUmr,     X86::MOVUPSmr,   X86::MOVUPDmr   },
+  { X86::MOVDQUrm,     X86::MOVUPSrm,   X86::MOVUPDrm   },
+  { X86::MOVNTDQmr,    X86::MOVNTPSmr,  X86::MOVNTPDmr  },
+  { X86::PANDNrm,      X86::ANDNPSrm,   X86::ANDNPDrm   },
+  { X86::PANDNrr,      X86::ANDNPSrr,   X86::ANDNPDrr   },
+  { X86::PANDrm,       X86::ANDPSrm,    X86::ANDPDrm    },
+  { X86::PANDrr,       X86::ANDPSrr,    X86::ANDPDrr    },
+  { X86::PORrm,        X86::ORPSrm,     X86::ORPDrm     },
+  { X86::PORrr,        X86::ORPSrr,     X86::ORPDrr     },
+  { X86::PUNPCKHQDQrm, X86::UNPCKHPSrm, X86::UNPCKHPDrm },
+  { X86::PUNPCKHQDQrr, X86::UNPCKHPSrr, X86::UNPCKHPDrr },
+  { X86::PUNPCKLQDQrm, X86::UNPCKLPSrm, X86::UNPCKLPDrm },
+  { X86::PUNPCKLQDQrr, X86::UNPCKLPSrr, X86::UNPCKLPDrr },
+  { X86::PXORrm,       X86::XORPSrm,    X86::XORPDrm    },
+  { X86::PXORrr,       X86::XORPSrr,    X86::XORPDrr    },
+};
+
+void X86InstrInfo::populateSSEInstrDomainTable() {
+  // Instructions that execute in the packed integer domain.
+  static const unsigned PackedIntInstrs[] = {
+    X86::LDDQUrm,
+    X86::MASKMOVDQU,
+    X86::MASKMOVDQU64,
+    X86::MOVDI2PDIrm,
+    X86::MOVDI2PDIrr,
+    X86::MOVDQUmr_Int,
+    X86::MOVDQUrm_Int,
+    X86::MOVLQ128mr,
+    X86::MOVNTDQArm,
+    X86::MOVNTDQmr_Int,
+    X86::MOVNTDQ_64mr,
+    X86::MOVPDI2DImr,
+    X86::MOVPDI2DIrr,
+    X86::MOVPQI2QImr,
+    X86::MOVPQIto64rr,
+    X86::MOVQI2PQIrm,
+    X86::MOVQxrxr,
+    X86::MOVZDI2PDIrm,
+    X86::MOVZDI2PDIrr,
+    X86::MOVZPQILo2PQIrm,
+    X86::MOVZPQILo2PQIrr,
+    X86::MOVZQI2PQIrm,
+    X86::MOVZQI2PQIrr,
+    X86::MPSADBWrmi,
+    X86::MPSADBWrri,
+    X86::PABSBrm128,
+    X86::PABSBrr128,
+    X86::PABSDrm128,
+    X86::PABSDrr128,
+    X86::PABSWrm128,
+    X86::PABSWrr128,
+    X86::PACKSSDWrm,
+    X86::PACKSSDWrr,
+    X86::PACKSSWBrm,
+    X86::PACKSSWBrr,
+    X86::PACKUSDWrm,
+    X86::PACKUSDWrr,
+    X86::PACKUSWBrm,
+    X86::PACKUSWBrr,
+    X86::PADDBrm,
+    X86::PADDBrr,
+    X86::PADDDrm,
+    X86::PADDDrr,
+    X86::PADDQrm,
+    X86::PADDQrr,
+    X86::PADDSBrm,
+    X86::PADDSBrr,
+    X86::PADDSWrm,
+    X86::PADDSWrr,
+    X86::PADDUSBrm,
+    X86::PADDUSBrr,
+    X86::PADDUSWrm,
+    X86::PADDUSWrr,
+    X86::PADDWrm,
+    X86::PADDWrr,
+    X86::PALIGNR128rm,
+    X86::PALIGNR128rr,
+    X86::PAVGBrm,
+    X86::PAVGBrr,
+    X86::PAVGWrm,
+    X86::PAVGWrr,
+    X86::PBLENDVBrm0,
+    X86::PBLENDVBrr0,
+    X86::PBLENDWrmi,
+    X86::PBLENDWrri,
+    X86::PCMPEQBrm,
+    X86::PCMPEQBrr,
+    X86::PCMPEQDrm,
+    X86::PCMPEQDrr,
+    X86::PCMPEQQrm,
+    X86::PCMPEQQrr,
+    X86::PCMPEQWrm,
+    X86::PCMPEQWrr,
+    X86::PCMPESTRIArm,
+    X86::PCMPESTRIArr,
+    X86::PCMPESTRICrm,
+    X86::PCMPESTRICrr,
+    X86::PCMPESTRIOrm,
+    X86::PCMPESTRIOrr,
+    X86::PCMPESTRIrm,
+    X86::PCMPESTRIrr,
+    X86::PCMPESTRISrm,
+    X86::PCMPESTRISrr,
+    X86::PCMPESTRIZrm,
+    X86::PCMPESTRIZrr,
+    X86::PCMPESTRM128MEM,
+    X86::PCMPESTRM128REG,
+    X86::PCMPESTRM128rm,
+    X86::PCMPESTRM128rr,
+    X86::PCMPGTBrm,
+    X86::PCMPGTBrr,
+    X86::PCMPGTDrm,
+    X86::PCMPGTDrr,
+    X86::PCMPGTQrm,
+    X86::PCMPGTQrr,
+    X86::PCMPGTWrm,
+    X86::PCMPGTWrr,
+    X86::PCMPISTRIArm,
+    X86::PCMPISTRIArr,
+    X86::PCMPISTRICrm,
+    X86::PCMPISTRICrr,
+    X86::PCMPISTRIOrm,
+    X86::PCMPISTRIOrr,
+    X86::PCMPISTRIrm,
+    X86::PCMPISTRIrr,
+    X86::PCMPISTRISrm,
+    X86::PCMPISTRISrr,
+    X86::PCMPISTRIZrm,
+    X86::PCMPISTRIZrr,
+    X86::PCMPISTRM128MEM,
+    X86::PCMPISTRM128REG,
+    X86::PCMPISTRM128rm,
+    X86::PCMPISTRM128rr,
+    X86::PEXTRBmr,
+    X86::PEXTRBrr,
+    X86::PEXTRDmr,
+    X86::PEXTRDrr,
+    X86::PEXTRQmr,
+    X86::PEXTRQrr,
+    X86::PEXTRWmr,
+    X86::PEXTRWri,
+    X86::PHADDDrm128,
+    X86::PHADDDrr128,
+    X86::PHADDSWrm128,
+    X86::PHADDSWrr128,
+    X86::PHADDWrm128,
+    X86::PHADDWrr128,
+    X86::PHMINPOSUWrm128,
+    X86::PHMINPOSUWrr128,
+    X86::PHSUBDrm128,
+    X86::PHSUBDrr128,
+    X86::PHSUBSWrm128,
+    X86::PHSUBSWrr128,
+    X86::PHSUBWrm128,
+    X86::PHSUBWrr128,
+    X86::PINSRBrm,
+    X86::PINSRBrr,
+    X86::PINSRDrm,
+    X86::PINSRDrr,
+    X86::PINSRQrm,
+    X86::PINSRQrr,
+    X86::PINSRWrmi,
+    X86::PINSRWrri,
+    X86::PMADDUBSWrm128,
+    X86::PMADDUBSWrr128,
+    X86::PMADDWDrm,
+    X86::PMADDWDrr,
+    X86::PMAXSBrm,
+    X86::PMAXSBrr,
+    X86::PMAXSDrm,
+    X86::PMAXSDrr,
+    X86::PMAXSWrm,
+    X86::PMAXSWrr,
+    X86::PMAXUBrm,
+    X86::PMAXUBrr,
+    X86::PMAXUDrm,
+    X86::PMAXUDrr,
+    X86::PMAXUWrm,
+    X86::PMAXUWrr,
+    X86::PMINSBrm,
+    X86::PMINSBrr,
+    X86::PMINSDrm,
+    X86::PMINSDrr,
+    X86::PMINSWrm,
+    X86::PMINSWrr,
+    X86::PMINUBrm,
+    X86::PMINUBrr,
+    X86::PMINUDrm,
+    X86::PMINUDrr,
+    X86::PMINUWrm,
+    X86::PMINUWrr,
+    X86::PMOVSXBDrm,
+    X86::PMOVSXBDrr,
+    X86::PMOVSXBQrm,
+    X86::PMOVSXBQrr,
+    X86::PMOVSXBWrm,
+    X86::PMOVSXBWrr,
+    X86::PMOVSXDQrm,
+    X86::PMOVSXDQrr,
+    X86::PMOVSXWDrm,
+    X86::PMOVSXWDrr,
+    X86::PMOVSXWQrm,
+    X86::PMOVSXWQrr,
+    X86::PMOVZXBDrm,
+    X86::PMOVZXBDrr,
+    X86::PMOVZXBQrm,
+    X86::PMOVZXBQrr,
+    X86::PMOVZXBWrm,
+    X86::PMOVZXBWrr,
+    X86::PMOVZXDQrm,
+    X86::PMOVZXDQrr,
+    X86::PMOVZXWDrm,
+    X86::PMOVZXWDrr,
+    X86::PMOVZXWQrm,
+    X86::PMOVZXWQrr,
+    X86::PMULDQrm,
+    X86::PMULDQrr,
+    X86::PMULHRSWrm128,
+    X86::PMULHRSWrr128,
+    X86::PMULHUWrm,
+    X86::PMULHUWrr,
+    X86::PMULHWrm,
+    X86::PMULHWrr,
+    X86::PMULLDrm,
+    X86::PMULLDrm_int,
+    X86::PMULLDrr,
+    X86::PMULLDrr_int,
+    X86::PMULLWrm,
+    X86::PMULLWrr,
+    X86::PMULUDQrm,
+    X86::PMULUDQrr,
+    X86::PSADBWrm,
+    X86::PSADBWrr,
+    X86::PSHUFBrm128,
+    X86::PSHUFBrr128,
+    X86::PSHUFHWmi,
+    X86::PSHUFHWri,
+    X86::PSHUFLWmi,
+    X86::PSHUFLWri,
+    X86::PSIGNBrm128,
+    X86::PSIGNBrr128,
+    X86::PSIGNDrm128,
+    X86::PSIGNDrr128,
+    X86::PSIGNWrm128,
+    X86::PSIGNWrr128,
+    X86::PSLLDQri,
+    X86::PSLLDri,
+    X86::PSLLDrm,
+    X86::PSLLDrr,
+    X86::PSLLQri,
+    X86::PSLLQrm,
+    X86::PSLLQrr,
+    X86::PSLLWri,
+    X86::PSLLWrm,
+    X86::PSLLWrr,
+    X86::PSRADri,
+    X86::PSRADrm,
+    X86::PSRADrr,
+    X86::PSRAWri,
+    X86::PSRAWrm,
+    X86::PSRAWrr,
+    X86::PSRLDQri,
+    X86::PSRLDri,
+    X86::PSRLDrm,
+    X86::PSRLDrr,
+    X86::PSRLQri,
+    X86::PSRLQrm,
+    X86::PSRLQrr,
+    X86::PSRLWri,
+    X86::PSRLWrm,
+    X86::PSRLWrr,
+    X86::PSUBBrm,
+    X86::PSUBBrr,
+    X86::PSUBDrm,
+    X86::PSUBDrr,
+    X86::PSUBQrm,
+    X86::PSUBQrr,
+    X86::PSUBSBrm,
+    X86::PSUBSBrr,
+    X86::PSUBSWrm,
+    X86::PSUBSWrr,
+    X86::PSUBUSBrm,
+    X86::PSUBUSBrr,
+    X86::PSUBUSWrm,
+    X86::PSUBUSWrr,
+    X86::PSUBWrm,
+    X86::PSUBWrr,
+    X86::PUNPCKHBWrm,
+    X86::PUNPCKHBWrr,
+    X86::PUNPCKHWDrm,
+    X86::PUNPCKHWDrr,
+    X86::PUNPCKLBWrm,
+    X86::PUNPCKLBWrr,
+    X86::PUNPCKLWDrm,
+    X86::PUNPCKLWDrr,
+  };
+
+  // Instructions that execute in the packed single domain.
+  static const unsigned PackedSingleInstrs[] = {
+    X86::ADDPSrm,
+    X86::ADDPSrr,
+    X86::ADDSUBPSrm,
+    X86::ADDSUBPSrr,
+    X86::BLENDPSrmi,
+    X86::BLENDPSrri,
+    X86::BLENDVPSrm0,
+    X86::BLENDVPSrr0,
+    X86::CMPPSrmi,
+    X86::CMPPSrri,
+    X86::DIVPSrm,
+    X86::DIVPSrr,
+    X86::DPPSrmi,
+    X86::DPPSrri,
+    X86::EXTRACTPSmr,
+    X86::EXTRACTPSrr,
+    X86::HADDPSrm,
+    X86::HADDPSrr,
+    X86::HSUBPSrm,
+    X86::HSUBPSrr,
+    X86::INSERTPSrm,
+    X86::INSERTPSrr,
+    X86::MAXPSrm,
+    X86::MAXPSrm_Int,
+    X86::MAXPSrr,
+    X86::MAXPSrr_Int,
+    X86::MINPSrm,
+    X86::MINPSrm_Int,
+    X86::MINPSrr,
+    X86::MINPSrr_Int,
+    X86::MOVHLPSrr,
+    X86::MOVHPSmr,
+    X86::MOVHPSrm,
+    X86::MOVLHPSrr,
+    X86::MOVLPSmr,
+    X86::MOVLPSrm,
+    X86::MOVMSKPSrr,
+    X86::MOVNTPSmr_Int,
+    X86::MOVSHDUPrm,
+    X86::MOVSHDUPrr,
+    X86::MOVSLDUPrm,
+    X86::MOVSLDUPrr,
+    X86::MOVUPSmr_Int,
+    X86::MOVUPSrm_Int,
+    X86::MULPSrm,
+    X86::MULPSrr,
+    X86::RCPPSm,
+    X86::RCPPSm_Int,
+    X86::RCPPSr,
+    X86::RCPPSr_Int,
+    X86::ROUNDPSm_Int,
+    X86::ROUNDPSr_Int,
+    X86::RSQRTPSm,
+    X86::RSQRTPSm_Int,
+    X86::RSQRTPSr,
+    X86::RSQRTPSr_Int,
+    X86::SQRTPSm,
+    X86::SQRTPSm_Int,
+    X86::SQRTPSr,
+    X86::SQRTPSr_Int,
+    X86::SUBPSrm,
+    X86::SUBPSrr,
+  };
+
+  // Instructions that execute in the packed double domain.
+  static const unsigned PackedDoubleInstrs[] = {
+    X86::ADDPDrm,
+    X86::ADDPDrr,
+    X86::ADDSUBPDrm,
+    X86::ADDSUBPDrr,
+    X86::BLENDPDrmi,
+    X86::BLENDPDrri,
+    X86::BLENDVPDrm0,
+    X86::BLENDVPDrr0,
+    X86::CMPPDrmi,
+    X86::CMPPDrri,
+    X86::DIVPDrm,
+    X86::DIVPDrr,
+    X86::DPPDrmi,
+    X86::DPPDrri,
+    X86::HADDPDrm,
+    X86::HADDPDrr,
+    X86::HSUBPDrm,
+    X86::HSUBPDrr,
+    X86::MAXPDrm,
+    X86::MAXPDrm_Int,
+    X86::MAXPDrr,
+    X86::MAXPDrr_Int,
+    X86::MINPDrm,
+    X86::MINPDrm_Int,
+    X86::MINPDrr,
+    X86::MINPDrr_Int,
+    X86::MOVHPDmr,
+    X86::MOVHPDrm,
+    X86::MOVLPDmr,
+    X86::MOVLPDrm,
+    X86::MOVMSKPDrr,
+    X86::MOVNTPDmr_Int,
+    X86::MOVUPDmr_Int,
+    X86::MOVUPDrm_Int,
+    X86::MULPDrm,
+    X86::MULPDrr,
+    X86::ROUNDPDm_Int,
+    X86::ROUNDPDr_Int,
+    X86::SQRTPDm,
+    X86::SQRTPDm_Int,
+    X86::SQRTPDr,
+    X86::SQRTPDr_Int,
+    X86::SUBPDrm,
+    X86::SUBPDrr,
+  };
+
+  // Add non-negative entries for forcing instructions.
+  for (unsigned i = 0, e = array_lengthof(PackedIntInstrs); i != e; ++i)
+    SSEInstrDomainTable.insert(std::make_pair(PackedIntInstrs[i],
+                                              PackedInt));
+  for (unsigned i = 0, e = array_lengthof(PackedSingleInstrs); i != e; ++i)
+    SSEInstrDomainTable.insert(std::make_pair(PackedSingleInstrs[i],
+                                              PackedSingle));
+  for (unsigned i = 0, e = array_lengthof(PackedDoubleInstrs); i != e; ++i)
+    SSEInstrDomainTable.insert(std::make_pair(PackedDoubleInstrs[i],
+                                              PackedDouble));
+
+  // Add row number + 1 for replaceable instructions.
+  for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
+    for (unsigned c = 0; c != 3; ++c)
+    SSEInstrDomainTable.insert(std::make_pair(ReplaceableInstrs[i][c],
+                                              c + 4*(i+1)));
+}
+
+X86InstrInfo::SSEDomain X86InstrInfo::GetSSEDomain(const MachineInstr *MI,
+                                                 const unsigned *&equiv) const {
+  DenseMap<unsigned,unsigned>::const_iterator i =
+    SSEInstrDomainTable.find(MI->getOpcode());
+  if (i == SSEInstrDomainTable.end())
+    return NotSSEDomain;
+  unsigned value = i->second;
+  if (value/4)
+    equiv = ReplaceableInstrs[value/4 - 1];
+  else
+    equiv = 0;
+  return SSEDomain(value & 3);
+}
index c753cf2a530f98c0c3e91fc01301b7986375777f..9be38a4b56a988d98b1377106cb1c3d66a0fdfd7 100644 (file)
@@ -41,6 +41,10 @@ FunctionPass *createX86ISelDag(X86TargetMachine &TM,
 ///
 FunctionPass *createX86FloatingPointStackifierPass();
 
+/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain
+/// crossings.
+FunctionPass *createSSEDomainFixPass();
+
 /// createX87FPRegKillInserterPass - This function returns a pass which
 /// inserts FP_REG_KILL instructions where needed.
 ///
index 2323f5790ab0f6d1534600b828e9a3bf4bcdeaaf..71a4dae6a5f4ad1515112fd07263a9f9426b10f4 100644 (file)
@@ -665,6 +665,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
 
   // Remove ambiguous entries.
   assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?");
+
+  if (TM.getSubtarget<X86Subtarget>().hasSSE2())
+    populateSSEInstrDomainTable();
 }
 
 bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
index 5111719a2094949934d874f2a34cf71b53dd57cc..c3dbae9a5ef046a50ce55bb3904853b31034cc16 100644 (file)
@@ -486,6 +486,9 @@ class X86InstrInfo : public TargetInstrInfoImpl {
   /// MemOp2RegOpTable - Load / store unfolding opcode map.
   ///
   DenseMap<unsigned*, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
+
+  /// SSEInstrDomainTable - Map SSE opcodes to execution domain info.
+  DenseMap<unsigned, unsigned> SSEInstrDomainTable;
   
 public:
   explicit X86InstrInfo(X86TargetMachine &tm);
@@ -716,6 +719,14 @@ public:
   ///
   unsigned getGlobalBaseReg(MachineFunction *MF) const;
 
+  /// Some SSE instructions come in variants for three domains.
+  enum SSEDomain { PackedInt, PackedSingle, PackedDouble, NotSSEDomain };
+
+  /// GetSSEDomain - Return the SSE execution domain of MI, or NotSSEDomain for
+  /// unknown instructions. If the instruction has equivalents for other domain,
+  /// equiv points to a list of opcodes index by domain.
+  SSEDomain GetSSEDomain(const MachineInstr *MI, const unsigned *&equiv) const;
+
 private:
   MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
                                               MachineFunction::iterator &MFI,
@@ -732,6 +743,9 @@ private:
   /// operand and follow operands form a reference to the stack frame.
   bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
                       int &FrameIndex) const;
+
+  // Implemented in SSEDomainFix.cpp
+  void populateSSEInstrDomainTable();
 };
 
 } // End llvm namespace
index f13e6f35256466e2a1747a3f2079b0ddd038a883..06a481de258dd71dd0df3434a6ab70b56183e1b8 100644 (file)
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
+static cl::opt<bool>
+SSEDomainFix("sse-domain-fix",
+               cl::desc("Enable fixing of SSE execution domain"),
+               cl::init(false), cl::Hidden);
+
 static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   Triple TheTriple(TT);
   switch (TheTriple.getOS()) {
@@ -169,6 +175,15 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
   return true;  // -print-machineinstr should print after this.
 }
 
+bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel) {
+  if (SSEDomainFix && OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
+    PM.add(createSSEDomainFixPass());
+    return true;
+  }
+  return false;
+}
+
 bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
                                       CodeGenOpt::Level OptLevel,
                                       JITCodeEmitter &JCE) {
index 2bb54544d408b8ee2d88aaa525c58f103fe35767..ae7b5b29af1456948882bc2cbe41e3bcb1590641 100644 (file)
@@ -66,6 +66,7 @@ public:
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
                               JITCodeEmitter &JCE);
 };