From bc5cbb8be9d935240be5a57a8f37c5258a9d0563 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Wed, 12 Nov 2008 22:55:05 +0000 Subject: [PATCH] Move the code that inserts X87 FP_REG_KILL instructions from a special-purpose hook to a new pass. Also, add check to see if any x87 virtual registers are used, to avoid doing any work in the common case that no x87 code is needed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@59190 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/SelectionDAGISel.h | 1 - lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 3 - lib/Target/X86/CMakeLists.txt | 1 + lib/Target/X86/X86.h | 5 + lib/Target/X86/X86FloatingPointRegKill.cpp | 139 ++++++++++++++++++ lib/Target/X86/X86ISelDAGToDAG.cpp | 80 ---------- lib/Target/X86/X86TargetMachine.cpp | 3 + 7 files changed, 148 insertions(+), 84 deletions(-) create mode 100644 lib/Target/X86/X86FloatingPointRegKill.cpp diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index d3e785b5fe7..cd58267994a 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -64,7 +64,6 @@ public: virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {} virtual void InstructionSelect() = 0; - virtual void InstructionSelectPostProcessing() {} void SelectRootInit() { DAGSize = CurDAG->AssignTopologicalOrder(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 3114d1b9bb3..23822fa7fb1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -840,9 +840,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, MachineFunction &MF, void SelectionDAGISel::FinishBasicBlock() { - // Perform target specific isel post processing. - InstructionSelectPostProcessing(); - DOUT << "Target-post-processed machine code:\n"; DEBUG(BB->dump()); diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 3c4f37f8104..d98299049a2 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_target(X86CodeGen X86CodeEmitter.cpp X86ELFWriterInfo.cpp X86FloatingPoint.cpp + X86FloatingPointRegKill.cpp X86ISelDAGToDAG.cpp X86ISelLowering.cpp X86InstrInfo.cpp diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index a77542d5fe9..0c2d3b36a2f 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -35,6 +35,11 @@ FunctionPass *createX86ISelDag(X86TargetMachine &TM, bool Fast); /// FunctionPass *createX86FloatingPointStackifierPass(); +/// createX87FPRegKillInserterPass - This function returns a pass which +/// inserts FP_REG_KILL instructions where needed. +/// +FunctionPass *createX87FPRegKillInserterPass(); + /// createX86CodePrinterPass - Returns a pass that prints the X86 /// assembly code for a MachineFunction to the given output stream, /// using the given target machine description. diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp new file mode 100644 index 00000000000..f8afc4c1c44 --- /dev/null +++ b/lib/Target/X86/X86FloatingPointRegKill.cpp @@ -0,0 +1,139 @@ +//===-- X86FloatingPoint.cpp - FP_REG_KILL inserter -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the pass which inserts FP_REG_KILL instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "x86-codegen" +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/Instructions.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CFG.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumFPKill, "Number of FP_REG_KILL instructions added"); + +namespace { + struct VISIBILITY_HIDDEN FPRegKiller : public MachineFunctionPass { + static char ID; + FPRegKiller() : MachineFunctionPass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { return "X86 FP_REG_KILL inserter"; } + }; + char FPRegKiller::ID = 0; +} + +FunctionPass *llvm::createX87FPRegKillInserterPass() { return new FPRegKiller(); } + +bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { + // If we are emitting FP stack code, scan the basic block to determine if this + // block defines any FP values. If so, put an FP_REG_KILL instruction before + // the terminator of the block. + + // Note that FP stack instructions are used in all modes for long double, + // so we always need to do this check. + // Also note that it's possible for an FP stack register to be live across + // an instruction that produces multiple basic blocks (SSE CMOV) so we + // must check all the generated basic blocks. + + // Scan all of the machine instructions in these MBBs, checking for FP + // stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.) + + // Fast-path: If nothing is using the x87 registers, we don't need to do + // any scanning. + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() && + MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() && + MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty()) + return false; + + bool Changed = false; + const X86Subtarget &Subtarget = MF.getTarget().getSubtarget(); + MachineFunction::iterator MBBI = MF.begin(); + MachineFunction::iterator EndMBB = MF.end(); + for (; MBBI != EndMBB; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + + // If this block returns, ignore it. We don't want to insert an FP_REG_KILL + // before the return. + if (!MBB->empty()) { + MachineBasicBlock::iterator EndI = MBB->end(); + --EndI; + if (EndI->getDesc().isReturn()) + continue; + } + + bool ContainsFPCode = false; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + !ContainsFPCode && I != E; ++I) { + if (I->getNumOperands() != 0 && I->getOperand(0).isReg()) { + const TargetRegisterClass *clas; + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { + if (I->getOperand(op).isReg() && I->getOperand(op).isDef() && + TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) && + ((clas = MRI.getRegClass(I->getOperand(op).getReg())) == + X86::RFP32RegisterClass || + clas == X86::RFP64RegisterClass || + clas == X86::RFP80RegisterClass)) { + ContainsFPCode = true; + break; + } + } + } + } + // Check PHI nodes in successor blocks. These PHI's will be lowered to have + // a copy of the input value in this block. In SSE mode, we only care about + // 80-bit values. + if (!ContainsFPCode) { + // Final check, check LLVM BB's that are successors to the LLVM BB + // corresponding to BB for FP PHI nodes. + const BasicBlock *LLVMBB = MBB->getBasicBlock(); + const PHINode *PN; + for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB); + !ContainsFPCode && SI != E; ++SI) { + for (BasicBlock::const_iterator II = SI->begin(); + (PN = dyn_cast(II)); ++II) { + if (PN->getType()==Type::X86_FP80Ty || + (!Subtarget.hasSSE1() && PN->getType()->isFloatingPoint()) || + (!Subtarget.hasSSE2() && PN->getType()==Type::DoubleTy)) { + ContainsFPCode = true; + break; + } + } + } + } + // Finally, if we found any FP code, emit the FP_REG_KILL instruction. + if (ContainsFPCode) { + BuildMI(*MBB, MBBI->getFirstTerminator(), + MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL)); + ++NumFPKill; + Changed = true; + } + } + + return Changed; +} diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index b543e23c59b..4bb9c235292 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -41,7 +41,6 @@ #include "llvm/ADT/Statistic.h" using namespace llvm; -STATISTIC(NumFPKill , "Number of FP_REG_KILL instructions added"); STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); //===----------------------------------------------------------------------===// @@ -140,10 +139,6 @@ namespace { /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. virtual void InstructionSelect(); - /// InstructionSelectPostProcessing - Post processing of selected and - /// scheduled basic blocks. - virtual void InstructionSelectPostProcessing(); - virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); virtual bool CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const; @@ -663,81 +658,6 @@ void X86DAGToDAGISel::InstructionSelect() { CurDAG->RemoveDeadNodes(); } -void X86DAGToDAGISel::InstructionSelectPostProcessing() { - // If we are emitting FP stack code, scan the basic block to determine if this - // block defines any FP values. If so, put an FP_REG_KILL instruction before - // the terminator of the block. - - // Note that FP stack instructions are used in all modes for long double, - // so we always need to do this check. - // Also note that it's possible for an FP stack register to be live across - // an instruction that produces multiple basic blocks (SSE CMOV) so we - // must check all the generated basic blocks. - - // Scan all of the machine instructions in these MBBs, checking for FP - // stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.) - MachineFunction::iterator MBBI = CurBB; - MachineFunction::iterator EndMBB = BB; ++EndMBB; - for (; MBBI != EndMBB; ++MBBI) { - MachineBasicBlock *MBB = MBBI; - - // If this block returns, ignore it. We don't want to insert an FP_REG_KILL - // before the return. - if (!MBB->empty()) { - MachineBasicBlock::iterator EndI = MBB->end(); - --EndI; - if (EndI->getDesc().isReturn()) - continue; - } - - bool ContainsFPCode = false; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - !ContainsFPCode && I != E; ++I) { - if (I->getNumOperands() != 0 && I->getOperand(0).isReg()) { - const TargetRegisterClass *clas; - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { - if (I->getOperand(op).isReg() && I->getOperand(op).isDef() && - TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) && - ((clas = RegInfo->getRegClass(I->getOperand(0).getReg())) == - X86::RFP32RegisterClass || - clas == X86::RFP64RegisterClass || - clas == X86::RFP80RegisterClass)) { - ContainsFPCode = true; - break; - } - } - } - } - // Check PHI nodes in successor blocks. These PHI's will be lowered to have - // a copy of the input value in this block. In SSE mode, we only care about - // 80-bit values. - if (!ContainsFPCode) { - // Final check, check LLVM BB's that are successors to the LLVM BB - // corresponding to BB for FP PHI nodes. - const BasicBlock *LLVMBB = BB->getBasicBlock(); - const PHINode *PN; - for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB); - !ContainsFPCode && SI != E; ++SI) { - for (BasicBlock::const_iterator II = SI->begin(); - (PN = dyn_cast(II)); ++II) { - if (PN->getType()==Type::X86_FP80Ty || - (!Subtarget->hasSSE1() && PN->getType()->isFloatingPoint()) || - (!Subtarget->hasSSE2() && PN->getType()==Type::DoubleTy)) { - ContainsFPCode = true; - break; - } - } - } - } - // Finally, if we found any FP code, emit the FP_REG_KILL instruction. - if (ContainsFPCode) { - BuildMI(*MBB, MBBI->getFirstTerminator(), - TM.getInstrInfo()->get(X86::FP_REG_KILL)); - ++NumFPKill; - } - } -} - /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in /// the main function. void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 2b67971d480..1e90c0fe8ca 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -188,6 +188,9 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM, bool Fast) { if (EnableFastISel) PM.add(createDeadMachineInstructionElimPass()); + // Install a pass to insert x87 FP_REG_KILL instructions, as needed. + PM.add(createX87FPRegKillInserterPass()); + return false; } -- 2.34.1