From f2db9b88dad3fe129b664471f9f79f4038956c4f Mon Sep 17 00:00:00 2001 From: Duraid Madina Date: Fri, 28 Oct 2005 17:46:35 +0000 Subject: [PATCH] DAG->DAG instruction selection for ia64! "hello world" works, not much else. use -enable-ia64-dag-isel to turn this on TODO: delete lowering stuff from the pattern isel : get operations on predicate bits working : get other bits of pseudocode going : use sampo's mulh/mull-using divide-by-constant magic : *so* many patterns ("extr", "tbit" and "dep" will be fun :) : add FP : add a JIT! : get it working 100% in short: this'll be happier in a couple of weeks, but it's here now so the tester can make me feel guilty sooner. OTHER: there are a couple of fixes to the pattern isel, in particular making the linker happy with big blobs of fun like pypy. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@24058 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/IA64/IA64.h | 5 + lib/Target/IA64/IA64AsmPrinter.cpp | 22 +- lib/Target/IA64/IA64ISelDAGToDAG.cpp | 497 ++++++++++++++++++++++++++ lib/Target/IA64/IA64ISelLowering.cpp | 369 +++++++++++++++++++ lib/Target/IA64/IA64ISelLowering.h | 88 +++++ lib/Target/IA64/IA64ISelPattern.cpp | 11 +- lib/Target/IA64/IA64InstrFormats.td | 8 + lib/Target/IA64/IA64InstrInfo.td | 245 +++++++++++-- lib/Target/IA64/IA64RegisterInfo.td | 8 +- lib/Target/IA64/IA64TargetMachine.cpp | 11 +- lib/Target/IA64/Makefile | 3 +- 11 files changed, 1226 insertions(+), 41 deletions(-) create mode 100644 lib/Target/IA64/IA64ISelDAGToDAG.cpp create mode 100644 lib/Target/IA64/IA64ISelLowering.cpp create mode 100644 lib/Target/IA64/IA64ISelLowering.h diff --git a/lib/Target/IA64/IA64.h b/lib/Target/IA64/IA64.h index 4eda1903c53..8fe7d9c1d62 100644 --- a/lib/Target/IA64/IA64.h +++ b/lib/Target/IA64/IA64.h @@ -22,6 +22,11 @@ class TargetMachine; class FunctionPass; class IntrinsicLowering; +/// createIA64DAGToDAGInstructionSelector - This pass converts an LLVM +/// function into IA64 machine code in a sane, DAG->DAG transform. +/// +FunctionPass *createIA64DAGToDAGInstructionSelector(TargetMachine &TM); + /// createIA64PatternInstructionSelector - This pass converts an LLVM function /// into a machine code representation in a more aggressive way. /// diff --git a/lib/Target/IA64/IA64AsmPrinter.cpp b/lib/Target/IA64/IA64AsmPrinter.cpp index 395fc89b8c2..d96a10a08ff 100644 --- a/lib/Target/IA64/IA64AsmPrinter.cpp +++ b/lib/Target/IA64/IA64AsmPrinter.cpp @@ -11,7 +11,7 @@ // of machine-dependent LLVM code to assembly accepted by the GNU binutils 'gas' // assembler. The Intel 'ias' and HP-UX 'as' assemblers *may* choke on this // output, but if so that's a bug I'd like to hear about: please file a bug -// report in bugzilla. FYI, the excellent 'ias' assembler is bundled with +// report in bugzilla. FYI, the not too bad 'ias' assembler is bundled with // the Intel C/C++ compiler for Itanium Linux. // //===----------------------------------------------------------------------===// @@ -249,7 +249,25 @@ namespace { } void printS64ImmOperand(const MachineInstr *MI, unsigned OpNo, MVT::ValueType VT) { - O << (int64_t)MI->getOperand(OpNo).getImmedValue(); +// XXX : nasty hack to avoid GPREL22 "relocation truncated to fit" linker +// errors - instead of add rX = @gprel(CPI), r1;; we now +// emit movl rX = @gprel(CPIgetOperand(OpNo).isImmediate()) { + O << (int64_t)MI->getOperand(OpNo).getImmedValue(); + } else { // this is a constant pool reference: FIXME: assert this + printOp(MI->getOperand(OpNo)); + } + } + + void printGlobalOperand(const MachineInstr *MI, unsigned OpNo, + MVT::ValueType VT) { + printOp(MI->getOperand(OpNo), false); // this is NOT a br.call instruction } void printCallOperand(const MachineInstr *MI, unsigned OpNo, diff --git a/lib/Target/IA64/IA64ISelDAGToDAG.cpp b/lib/Target/IA64/IA64ISelDAGToDAG.cpp new file mode 100644 index 00000000000..791004cc5f7 --- /dev/null +++ b/lib/Target/IA64/IA64ISelDAGToDAG.cpp @@ -0,0 +1,497 @@ +//===---- IA64ISelDAGToDAG.cpp - IA64 pattern matching inst selector ------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Duraid Madina and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a pattern matching instruction selector for IA64, +// converting a legalized dag to an IA64 dag. +// +//===----------------------------------------------------------------------===// + +#include "IA64.h" +#include "IA64TargetMachine.h" +#include "IA64ISelLowering.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Constants.h" +#include "llvm/GlobalValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +namespace { + Statistic<> FusedFP ("ia64-codegen", "Number of fused fp operations"); + Statistic<> FrameOff("ia64-codegen", "Number of frame idx offsets collapsed"); + + //===--------------------------------------------------------------------===// + /// IA64DAGToDAGISel - IA64 specific code to select IA64 machine + /// instructions for SelectionDAG operations. + /// + class IA64DAGToDAGISel : public SelectionDAGISel { + IA64TargetLowering IA64Lowering; + unsigned GlobalBaseReg; + public: + IA64DAGToDAGISel(TargetMachine &TM) + : SelectionDAGISel(IA64Lowering), IA64Lowering(TM) {} + + virtual bool runOnFunction(Function &Fn) { + // Make sure we re-emit a set of the global base reg if necessary + GlobalBaseReg = 0; + return SelectionDAGISel::runOnFunction(Fn); + } + + /// getI64Imm - Return a target constant with the specified value, of type + /// i64. + inline SDOperand getI64Imm(uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i64); + } + + /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC + /// base register. Return the virtual register that holds this value. + // SDOperand getGlobalBaseReg(); TODO: hmm + + // Select - Convert the specified operand from a target-independent to a + // target-specific node if it hasn't already been changed. + SDOperand Select(SDOperand Op); + + SDNode *SelectIntImmediateExpr(SDOperand LHS, SDOperand RHS, + unsigned OCHi, unsigned OCLo, + bool IsArithmetic = false, + bool Negate = false); + SDNode *SelectBitfieldInsert(SDNode *N); + + /// SelectCC - Select a comparison of the specified values with the + /// specified condition code, returning the CR# of the expression. + SDOperand SelectCC(SDOperand LHS, SDOperand RHS, ISD::CondCode CC); + + /// SelectAddr - Given the specified address, return the two operands for a + /// load/store instruction, and return true if it should be an indexed [r+r] + /// operation. + bool SelectAddr(SDOperand Addr, SDOperand &Op1, SDOperand &Op2); + + SDOperand BuildSDIVSequence(SDNode *N); + SDOperand BuildUDIVSequence(SDNode *N); + + /// InstructionSelectBasicBlock - This callback is invoked by + /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. + virtual void InstructionSelectBasicBlock(SelectionDAG &DAG); + + virtual const char *getPassName() const { + return "IA64 (Itanium) DAG->DAG Instruction Selector"; + } + +// Include the pieces autogenerated from the target description. +#include "IA64GenDAGISel.inc" + +private: + SDOperand SelectCALL(SDOperand Op); + }; +} + +/// InstructionSelectBasicBlock - This callback is invoked by +/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. +void IA64DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { + DEBUG(BB->dump()); + + // The selection process is inherently a bottom-up recursive process (users + // select their uses before themselves). Given infinite stack space, we + // could just start selecting on the root and traverse the whole graph. In + // practice however, this causes us to run out of stack space on large basic + // blocks. To avoid this problem, select the entry node, then all its uses, + // iteratively instead of recursively. + std::vector Worklist; + Worklist.push_back(DAG.getEntryNode()); + + // Note that we can do this in the IA64 target (scanning forward across token + // chain edges) because no nodes ever get folded across these edges. On a + // target like X86 which supports load/modify/store operations, this would + // have to be more careful. + while (!Worklist.empty()) { + SDOperand Node = Worklist.back(); + Worklist.pop_back(); + + // Chose from the least deep of the top two nodes. + if (!Worklist.empty() && + Worklist.back().Val->getNodeDepth() < Node.Val->getNodeDepth()) + std::swap(Worklist.back(), Node); + + if ((Node.Val->getOpcode() >= ISD::BUILTIN_OP_END && + Node.Val->getOpcode() < IA64ISD::FIRST_NUMBER) || + CodeGenMap.count(Node)) continue; + + for (SDNode::use_iterator UI = Node.Val->use_begin(), + E = Node.Val->use_end(); UI != E; ++UI) { + // Scan the values. If this use has a value that is a token chain, add it + // to the worklist. + SDNode *User = *UI; + for (unsigned i = 0, e = User->getNumValues(); i != e; ++i) + if (User->getValueType(i) == MVT::Other) { + Worklist.push_back(SDOperand(User, i)); + break; + } + } + + // Finally, legalize this node. + Select(Node); + } + + // Select target instructions for the DAG. + DAG.setRoot(Select(DAG.getRoot())); + CodeGenMap.clear(); + DAG.RemoveDeadNodes(); + + // Emit machine code to BB. + ScheduleAndEmitDAG(DAG); +} + + +SDOperand IA64DAGToDAGISel::SelectCALL(SDOperand Op) { + SDNode *N = Op.Val; + SDOperand Chain = Select(N->getOperand(0)); + + unsigned CallOpcode; + std::vector CallOperands; + + // save the current GP, SP and RP : FIXME: do we need to do all 3 always? + SDOperand GPBeforeCall = CurDAG->getCopyFromReg(Chain, IA64::r1, MVT::i64); + Chain = GPBeforeCall.getValue(1); + SDOperand SPBeforeCall = CurDAG->getCopyFromReg(Chain, IA64::r12, MVT::i64); + Chain = SPBeforeCall.getValue(1); + SDOperand RPBeforeCall = CurDAG->getCopyFromReg(Chain, IA64::rp, MVT::i64); + Chain = RPBeforeCall.getValue(1); + + // if we can call directly, do so + if (GlobalAddressSDNode *GASD = + dyn_cast(N->getOperand(1))) { + CallOpcode = IA64::BRCALL_IPREL; + CallOperands.push_back(CurDAG->getTargetGlobalAddress(GASD->getGlobal(), + MVT::i64)); + } else if (ExternalSymbolSDNode *ESSDN = // FIXME: we currently NEED this + // case for correctness, to avoid + // "non-pic code with imm reloc.n + // against dynamic symbol" errors + dyn_cast(N->getOperand(1))) { + CallOpcode = IA64::BRCALL_IPREL; + CallOperands.push_back(N->getOperand(1)); + } else { + // otherwise we need to load the function descriptor, + // load the branch target (function)'s entry point and GP, + // branch (call) then restore the + // GP + + SDOperand FnDescriptor = Select(N->getOperand(1)); + + // load the branch target's entry point [mem] and + // GP value [mem+8] + SDOperand targetEntryPoint=CurDAG->getLoad(MVT::i64, Chain, FnDescriptor, + CurDAG->getSrcValue(0)); + SDOperand targetGPAddr=CurDAG->getNode(ISD::ADD, MVT::i64, FnDescriptor, + CurDAG->getConstant(8, MVT::i64)); + SDOperand targetGP=CurDAG->getLoad(MVT::i64, Chain, targetGPAddr, + CurDAG->getSrcValue(0)); + + // Copy the callee address into the b6 branch register + SDOperand B6 = CurDAG->getRegister(IA64::B6, MVT::i64); + Chain = CurDAG->getNode(ISD::CopyToReg, MVT::Other, Chain, B6, + targetEntryPoint); + + CallOperands.push_back(B6); + CallOpcode = IA64::BRCALL_INDIRECT; + } + + // TODO: support in-memory arguments + unsigned used_FPArgs=0; // how many FP args have been used so far? + + unsigned intArgs[] = {IA64::out0, IA64::out1, IA64::out2, IA64::out3, + IA64::out4, IA64::out5, IA64::out6, IA64::out7 }; + unsigned FPArgs[] = {IA64::F8, IA64::F9, IA64::F10, IA64::F11, + IA64::F12, IA64::F13, IA64::F14, IA64::F15 }; + + SDOperand InFlag; // Null incoming flag value. + + for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) { + unsigned DestReg = 0; + MVT::ValueType RegTy = N->getOperand(i).getValueType(); + if (RegTy == MVT::i64) { + assert((i-2) < 8 && "Too many int args"); + DestReg = intArgs[i-2]; + } else { + assert(MVT::isFloatingPoint(N->getOperand(i).getValueType()) && + "Unpromoted integer arg?"); + assert(used_FPArgs < 8 && "Too many fp args"); + DestReg = FPArgs[used_FPArgs++]; + } + + if (N->getOperand(i).getOpcode() != ISD::UNDEF) { + SDOperand Val = Select(N->getOperand(i)); + Chain = CurDAG->getCopyToReg(Chain, DestReg, Val, InFlag); + InFlag = Chain.getValue(1); + CallOperands.push_back(CurDAG->getRegister(DestReg, RegTy)); + } + } + + // Finally, once everything is in registers to pass to the call, emit the + // call itself. + if (InFlag.Val) + CallOperands.push_back(InFlag); // Strong dep on register copies. + else + CallOperands.push_back(Chain); // Weak dep on whatever occurs before + Chain = CurDAG->getTargetNode(CallOpcode, MVT::Other, MVT::Flag, + CallOperands); + +// return Chain; // HACK: err, this means that functions never return anything. need to intergrate this with the code immediately below FIXME XXX + + std::vector CallResults; + + // If the call has results, copy the values out of the ret val registers. + switch (N->getValueType(0)) { + default: assert(0 && "Unexpected ret value!"); + case MVT::Other: break; + case MVT::i64: + Chain = CurDAG->getCopyFromReg(Chain, IA64::r8, MVT::i64, + Chain.getValue(1)).getValue(1); + CallResults.push_back(Chain.getValue(0)); + break; + case MVT::f64: + Chain = CurDAG->getCopyFromReg(Chain, IA64::F8, N->getValueType(0), + Chain.getValue(1)).getValue(1); + CallResults.push_back(Chain.getValue(0)); + break; + } + // restore GP, SP and RP + Chain = CurDAG->getCopyToReg(Chain, IA64::r1, GPBeforeCall); + Chain = CurDAG->getCopyToReg(Chain, IA64::r12, SPBeforeCall); + Chain = CurDAG->getCopyToReg(Chain, IA64::rp, RPBeforeCall); + + CallResults.push_back(Chain); + + for (unsigned i = 0, e = CallResults.size(); i != e; ++i) + CodeGenMap[Op.getValue(i)] = CallResults[i]; + + return CallResults[Op.ResNo]; +} + +// Select - Convert the specified operand from a target-independent to a +// target-specific node if it hasn't already been changed. +SDOperand IA64DAGToDAGISel::Select(SDOperand Op) { + SDNode *N = Op.Val; + if (N->getOpcode() >= ISD::BUILTIN_OP_END && + N->getOpcode() < IA64ISD::FIRST_NUMBER) + return Op; // Already selected. + + // If this has already been converted, use it. + std::map::iterator CGMI = CodeGenMap.find(Op); + if (CGMI != CodeGenMap.end()) return CGMI->second; + + switch (N->getOpcode()) { + default: break; + + case ISD::CALL: + case ISD::TAILCALL: return SelectCALL(Op); + +/* todo: + * case ISD::DYNAMIC_STACKALLOC: +*/ + + case ISD::FrameIndex: { // TODO: reduce creepyness + int FI = cast(N)->getIndex(); + if (N->hasOneUse()) { + CurDAG->SelectNodeTo(N, IA64::MOV, MVT::i64, + CurDAG->getTargetFrameIndex(FI, MVT::i64)); + return SDOperand(N, 0); + } + return CurDAG->getTargetNode(IA64::MOV, MVT::i64, + CurDAG->getTargetFrameIndex(FI, MVT::i64)); + } + + case ISD::TokenFactor: { + SDOperand New; + if (N->getNumOperands() == 2) { + SDOperand Op0 = Select(N->getOperand(0)); + SDOperand Op1 = Select(N->getOperand(1)); + New = CurDAG->getNode(ISD::TokenFactor, MVT::Other, Op0, Op1); + } else { + std::vector Ops; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + Ops.push_back(Select(N->getOperand(i))); + New = CurDAG->getNode(ISD::TokenFactor, MVT::Other, Ops); + } + + CodeGenMap[Op] = New; + return New; + } + case ISD::CopyFromReg: { + SDOperand Chain = Select(N->getOperand(0)); + if (Chain == N->getOperand(0)) return Op; // No change + SDOperand New = CurDAG->getCopyFromReg(Chain, + cast(N->getOperand(1))->getReg(), N->getValueType(0)); + return New.getValue(Op.ResNo); + } + case ISD::CopyToReg: { + SDOperand Chain = Select(N->getOperand(0)); + SDOperand Reg = N->getOperand(1); + SDOperand Val = Select(N->getOperand(2)); + SDOperand New = CurDAG->getNode(ISD::CopyToReg, MVT::Other, + Chain, Reg, Val); + CodeGenMap[Op] = New; + return New; + } + + case ISD::GlobalAddress: { + GlobalValue *GV = cast(N)->getGlobal(); + SDOperand GA = CurDAG->getTargetGlobalAddress(GV, MVT::i64); + SDOperand Tmp = CurDAG->getTargetNode(IA64::ADDL_GA, MVT::i64, + CurDAG->getRegister(IA64::r1, MVT::i64), GA); + return CurDAG->getTargetNode(IA64::LD8, MVT::i64, Tmp); + } + + case ISD::LOAD: + case ISD::EXTLOAD: + case ISD::ZEXTLOAD: { + SDOperand Chain = Select(N->getOperand(0)); + SDOperand Address = Select(N->getOperand(1)); + + MVT::ValueType TypeBeingLoaded = (N->getOpcode() == ISD::LOAD) ? + N->getValueType(0) : cast(N->getOperand(3))->getVT(); + unsigned Opc; + switch (TypeBeingLoaded) { + default: N->dump(); assert(0 && "Cannot load this type!"); + // FIXME: bools? case MVT::i1: + case MVT::i8: Opc = IA64::LD1; break; + case MVT::i16: Opc = IA64::LD2; break; + case MVT::i32: Opc = IA64::LD4; break; + case MVT::i64: Opc = IA64::LD8; break; + + case MVT::f32: Opc = IA64::LDF4; break; + case MVT::f64: Opc = IA64::LDF8; break; + } + + CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), MVT::Other, + Address, Chain); // TODO: comment this + + return SDOperand(N, Op.ResNo); + } + + case ISD::TRUNCSTORE: + case ISD::STORE: { + SDOperand Address = Select(N->getOperand(2)); + + unsigned Opc; + if (N->getOpcode() == ISD::STORE) { + switch (N->getOperand(1).getValueType()) { + default: assert(0 && "unknown Type in store"); + case MVT::i64: Opc = IA64::ST8; break; + case MVT::f64: Opc = IA64::STF8; break; + } + } else { //ISD::TRUNCSTORE + switch(cast(N->getOperand(4))->getVT()) { + default: assert(0 && "unknown Type in store"); + case MVT::i8: Opc = IA64::ST1; break; + case MVT::i16: Opc = IA64::ST2; break; + case MVT::i32: Opc = IA64::ST4; break; + case MVT::f32: Opc = IA64::STF4; break; + } + } + + CurDAG->SelectNodeTo(N, Opc, MVT::Other, Select(N->getOperand(2)), + Select(N->getOperand(1)), Select(N->getOperand(0))); + return SDOperand(N, 0); + } + + case ISD::BRCOND: { + SDOperand Chain = Select(N->getOperand(0)); + SDOperand CC = Select(N->getOperand(1)); + MachineBasicBlock *Dest = + cast(N->getOperand(2))->getBasicBlock(); + //FIXME - we do NOT need long branches all the time + CurDAG->SelectNodeTo(N, IA64::BRLCOND_NOTCALL, MVT::Other, CC, CurDAG->getBasicBlock(Dest), Chain); + return SDOperand(N, 0); + } + + case ISD::CALLSEQ_START: + case ISD::CALLSEQ_END: { + int64_t Amt = cast(N->getOperand(1))->getValue(); + unsigned Opc = N->getOpcode() == ISD::CALLSEQ_START ? + IA64::ADJUSTCALLSTACKDOWN : IA64::ADJUSTCALLSTACKUP; + CurDAG->SelectNodeTo(N, Opc, MVT::Other, + getI64Imm(Amt), Select(N->getOperand(0))); + return SDOperand(N, 0); + } + + case ISD::RET: { + SDOperand Chain = Select(N->getOperand(0)); // Token chain. + + switch (N->getNumOperands()) { + default: + assert(0 && "Unknown return instruction!"); + case 2: { + SDOperand RetVal = Select(N->getOperand(1)); + switch (RetVal.getValueType()) { + default: assert(0 && "I don't know how to return this type! (promote?)"); + // FIXME: do I need to add support for bools here? + // (return '0' or '1' in r8, basically...) + // + // FIXME: need to round floats - 80 bits is bad, the tester + // told me so + case MVT::i64: + // we mark r8 as live on exit up above in LowerArguments() + // BuildMI(BB, IA64::MOV, 1, IA64::r8).addReg(Tmp1); + Chain = CurDAG->getCopyToReg(Chain, IA64::r8, RetVal); + break; + case MVT::f64: + // we mark F8 as live on exit up above in LowerArguments() + // BuildMI(BB, IA64::FMOV, 1, IA64::F8).addReg(Tmp1); + Chain = CurDAG->getCopyToReg(Chain, IA64::F8, RetVal); + break; + } + break; + } + case 1: + break; + } + + // we need to copy VirtGPR (the vreg (to become a real reg)) that holds + // the output of this function's alloc instruction back into ar.pfs + // before we return. this copy must not float up above the last + // outgoing call in this function!!! + SDOperand AR_PFSVal = CurDAG->getCopyFromReg(Chain, IA64Lowering.VirtGPR, + MVT::i64); + Chain = AR_PFSVal.getValue(1); + Chain = CurDAG->getCopyToReg(Chain, IA64::AR_PFS, AR_PFSVal); + + CurDAG->SelectNodeTo(N, IA64::RET, MVT::Other, Chain); // and then just emit a 'ret' instruction + + // before returning, restore the ar.pfs register (set by the 'alloc' up top) + // BuildMI(BB, IA64::MOV, 1).addReg(IA64::AR_PFS).addReg(IA64Lowering.VirtGPR); + // + return SDOperand(N, 0); + } + + case ISD::BR: + // FIXME: we don't need long branches all the time! + CurDAG->SelectNodeTo(N, IA64::BRL_NOTCALL, MVT::Other, N->getOperand(1), + Select(N->getOperand(0))); + return SDOperand(N, 0); + + } + + return SelectCode(Op); +} + + +/// createIA64DAGToDAGInstructionSelector - This pass converts a legalized DAG +/// into an IA64-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createIA64DAGToDAGInstructionSelector(TargetMachine &TM) { + return new IA64DAGToDAGISel(TM); +} + diff --git a/lib/Target/IA64/IA64ISelLowering.cpp b/lib/Target/IA64/IA64ISelLowering.cpp new file mode 100644 index 00000000000..2ffa24e78c0 --- /dev/null +++ b/lib/Target/IA64/IA64ISelLowering.cpp @@ -0,0 +1,369 @@ +//===-- IA64ISelLowering.cpp - IA64 DAG Lowering Implementation -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Duraid Madina and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the IA64ISelLowering class. +// +//===----------------------------------------------------------------------===// + +#include "IA64ISelLowering.h" +#include "IA64MachineFunctionInfo.h" +#include "IA64TargetMachine.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +using namespace llvm; + +IA64TargetLowering::IA64TargetLowering(TargetMachine &TM) + : TargetLowering(TM) { + + // register class for general registers + addRegisterClass(MVT::i64, IA64::GRRegisterClass); + + // register class for FP registers + addRegisterClass(MVT::f64, IA64::FPRegisterClass); + + // register class for predicate registers + addRegisterClass(MVT::i1, IA64::PRRegisterClass); + + setOperationAction(ISD::BRCONDTWOWAY , MVT::Other, Expand); + setOperationAction(ISD::BRTWOWAY_CC , MVT::Other, Expand); + setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); + + setSetCCResultType(MVT::i1); + setShiftAmountType(MVT::i64); + + setOperationAction(ISD::EXTLOAD , MVT::i1 , Promote); + + setOperationAction(ISD::ZEXTLOAD , MVT::i1 , Expand); + + setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); + setOperationAction(ISD::SEXTLOAD , MVT::i8 , Expand); + setOperationAction(ISD::SEXTLOAD , MVT::i16 , Expand); + setOperationAction(ISD::SEXTLOAD , MVT::i32 , Expand); + + setOperationAction(ISD::FREM , MVT::f32 , Expand); + setOperationAction(ISD::FREM , MVT::f64 , Expand); + + setOperationAction(ISD::UREM , MVT::f32 , Expand); + setOperationAction(ISD::UREM , MVT::f64 , Expand); + + setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); + setOperationAction(ISD::MEMSET , MVT::Other, Expand); + setOperationAction(ISD::MEMCPY , MVT::Other, Expand); + + setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); + setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); + + // We don't support sin/cos/sqrt + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FSQRT, MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSQRT, MVT::f32, Expand); + + //IA64 has these, but they are not implemented + setOperationAction(ISD::CTTZ , MVT::i64 , Expand); + setOperationAction(ISD::CTLZ , MVT::i64 , Expand); + + computeRegisterProperties(); + + addLegalFPImmediate(+0.0); + addLegalFPImmediate(+1.0); + addLegalFPImmediate(-0.0); + addLegalFPImmediate(-1.0); + +} + +/// isFloatingPointZero - Return true if this is 0.0 or -0.0. +static bool isFloatingPointZero(SDOperand Op) { + if (ConstantFPSDNode *CFP = dyn_cast(Op)) + return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0); + else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) { + // Maybe this has already been legalized into the constant pool? + if (ConstantPoolSDNode *CP = dyn_cast(Op.getOperand(1))) + if (ConstantFP *CFP = dyn_cast(CP->get())) + return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0); + } + return false; +} + +std::vector +IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { + std::vector ArgValues; + // + // add beautiful description of IA64 stack frame format + // here (from intel 24535803.pdf most likely) + // + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + GP = MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64)); + SP = MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64)); + RP = MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64)); + + MachineBasicBlock& BB = MF.front(); + + unsigned args_int[] = {IA64::r32, IA64::r33, IA64::r34, IA64::r35, + IA64::r36, IA64::r37, IA64::r38, IA64::r39}; + + unsigned args_FP[] = {IA64::F8, IA64::F9, IA64::F10, IA64::F11, + IA64::F12,IA64::F13,IA64::F14, IA64::F15}; + + unsigned argVreg[8]; + unsigned argPreg[8]; + unsigned argOpc[8]; + + unsigned used_FPArgs = 0; // how many FP args have been used so far? + + unsigned ArgOffset = 0; + int count = 0; + + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) + { + SDOperand newroot, argt; + if(count < 8) { // need to fix this logic? maybe. + + switch (getValueType(I->getType())) { + default: + assert(0 && "ERROR in LowerArgs: can't lower this type of arg.\n"); + case MVT::f32: + // fixme? (well, will need to for weird FP structy stuff, + // see intel ABI docs) + case MVT::f64: +//XXX BuildMI(&BB, IA64::IDEF, 0, args_FP[used_FPArgs]); + MF.addLiveIn(args_FP[used_FPArgs]); // mark this reg as liveIn + // floating point args go into f8..f15 as-needed, the increment + argVreg[count] = // is below..: + MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::f64)); + // FP args go into f8..f15 as needed: (hence the ++) + argPreg[count] = args_FP[used_FPArgs++]; + argOpc[count] = IA64::FMOV; + argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), argVreg[count], + MVT::f64); + if (I->getType() == Type::FloatTy) + argt = DAG.getNode(ISD::FP_ROUND, MVT::f32, argt); + break; + case MVT::i1: // NOTE: as far as C abi stuff goes, + // bools are just boring old ints + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: +//XXX BuildMI(&BB, IA64::IDEF, 0, args_int[count]); + MF.addLiveIn(args_int[count]); // mark this register as liveIn + argVreg[count] = + MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64)); + argPreg[count] = args_int[count]; + argOpc[count] = IA64::MOV; + argt = newroot = + DAG.getCopyFromReg(DAG.getRoot(), argVreg[count], MVT::i64); + if ( getValueType(I->getType()) != MVT::i64) + argt = DAG.getNode(ISD::TRUNCATE, getValueType(I->getType()), + newroot); + break; + } + } else { // more than 8 args go into the frame + // Create the frame index object for this incoming parameter... + ArgOffset = 16 + 8 * (count - 8); + int FI = MFI->CreateFixedObject(8, ArgOffset); + + // Create the SelectionDAG nodes corresponding to a load + //from this parameter + SDOperand FIN = DAG.getFrameIndex(FI, MVT::i64); + argt = newroot = DAG.getLoad(getValueType(I->getType()), + DAG.getEntryNode(), FIN, DAG.getSrcValue(NULL)); + } + ++count; + DAG.setRoot(newroot.getValue(1)); + ArgValues.push_back(argt); + } + + + // Create a vreg to hold the output of (what will become) + // the "alloc" instruction + VirtGPR = MF.getSSARegMap()->createVirtualRegister(getRegClassFor(MVT::i64)); + BuildMI(&BB, IA64::PSEUDO_ALLOC, 0, VirtGPR); + // we create a PSEUDO_ALLOC (pseudo)instruction for now + + BuildMI(&BB, IA64::IDEF, 0, IA64::r1); + + // hmm: + BuildMI(&BB, IA64::IDEF, 0, IA64::r12); + BuildMI(&BB, IA64::IDEF, 0, IA64::rp); + // ..hmm. + + BuildMI(&BB, IA64::MOV, 1, GP).addReg(IA64::r1); + + // hmm: + BuildMI(&BB, IA64::MOV, 1, SP).addReg(IA64::r12); + BuildMI(&BB, IA64::MOV, 1, RP).addReg(IA64::rp); + // ..hmm. + + unsigned tempOffset=0; + + // if this is a varargs function, we simply lower llvm.va_start by + // pointing to the first entry + if(F.isVarArg()) { + tempOffset=0; + VarArgsFrameIndex = MFI->CreateFixedObject(8, tempOffset); + } + + // here we actually do the moving of args, and store them to the stack + // too if this is a varargs function: + for (int i = 0; i < count && i < 8; ++i) { + BuildMI(&BB, argOpc[i], 1, argVreg[i]).addReg(argPreg[i]); + if(F.isVarArg()) { + // if this is a varargs function, we copy the input registers to the stack + int FI = MFI->CreateFixedObject(8, tempOffset); + tempOffset+=8; //XXX: is it safe to use r22 like this? + BuildMI(&BB, IA64::MOV, 1, IA64::r22).addFrameIndex(FI); + // FIXME: we should use st8.spill here, one day + BuildMI(&BB, IA64::ST8, 1, IA64::r22).addReg(argPreg[i]); + } + } + + // Finally, inform the code generator which regs we return values in. + // (see the ISD::RET: case in the instruction selector) + switch (getValueType(F.getReturnType())) { + default: assert(0 && "i have no idea where to return this type!"); + case MVT::isVoid: break; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + MF.addLiveOut(IA64::r8); + break; + case MVT::f32: + case MVT::f64: + MF.addLiveOut(IA64::F8); + break; + } + + return ArgValues; +} + +std::pair +IA64TargetLowering::LowerCallTo(SDOperand Chain, + const Type *RetTy, bool isVarArg, + unsigned CallingConv, bool isTailCall, + SDOperand Callee, ArgListTy &Args, + SelectionDAG &DAG) { + + MachineFunction &MF = DAG.getMachineFunction(); + + unsigned NumBytes = 16; + unsigned outRegsUsed = 0; + + if (Args.size() > 8) { + NumBytes += (Args.size() - 8) * 8; + outRegsUsed = 8; + } else { + outRegsUsed = Args.size(); + } + + // FIXME? this WILL fail if we ever try to pass around an arg that + // consumes more than a single output slot (a 'real' double, int128 + // some sort of aggregate etc.), as we'll underestimate how many 'outX' + // registers we use. Hopefully, the assembler will notice. + MF.getInfo()->outRegsUsed= + std::max(outRegsUsed, MF.getInfo()->outRegsUsed); + + Chain = DAG.getNode(ISD::CALLSEQ_START, MVT::Other, Chain, + DAG.getConstant(NumBytes, getPointerTy())); + + std::vector args_to_use; + for (unsigned i = 0, e = Args.size(); i != e; ++i) + { + switch (getValueType(Args[i].second)) { + default: assert(0 && "unexpected argument type!"); + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + //promote to 64-bits, sign/zero extending based on type + //of the argument + if(Args[i].second->isSigned()) + Args[i].first = DAG.getNode(ISD::SIGN_EXTEND, MVT::i64, + Args[i].first); + else + Args[i].first = DAG.getNode(ISD::ZERO_EXTEND, MVT::i64, + Args[i].first); + break; + case MVT::f32: + //promote to 64-bits + Args[i].first = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Args[i].first); + case MVT::f64: + case MVT::i64: + break; + } + args_to_use.push_back(Args[i].first); + } + + std::vector RetVals; + MVT::ValueType RetTyVT = getValueType(RetTy); + if (RetTyVT != MVT::isVoid) + RetVals.push_back(RetTyVT); + RetVals.push_back(MVT::Other); + + SDOperand TheCall = SDOperand(DAG.getCall(RetVals, Chain, + Callee, args_to_use), 0); + Chain = TheCall.getValue(RetTyVT != MVT::isVoid); + Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain, + DAG.getConstant(NumBytes, getPointerTy())); + return std::make_pair(TheCall, Chain); +} + +SDOperand +IA64TargetLowering::LowerVAStart(SDOperand Chain, SDOperand VAListP, + Value *VAListV, SelectionDAG &DAG) { + // vastart just stores the address of the VarArgsFrameIndex slot. + SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i64); + return DAG.getNode(ISD::STORE, MVT::Other, Chain, FR, + VAListP, DAG.getSrcValue(VAListV)); +} + +std::pair IA64TargetLowering:: +LowerVAArg(SDOperand Chain, SDOperand VAListP, Value *VAListV, + const Type *ArgTy, SelectionDAG &DAG) { + + MVT::ValueType ArgVT = getValueType(ArgTy); + SDOperand Val = DAG.getLoad(MVT::i64, Chain, + VAListP, DAG.getSrcValue(VAListV)); + SDOperand Result = DAG.getLoad(ArgVT, DAG.getEntryNode(), Val, + DAG.getSrcValue(NULL)); + unsigned Amt; + if (ArgVT == MVT::i32 || ArgVT == MVT::f32) + Amt = 8; + else { + assert((ArgVT == MVT::i64 || ArgVT == MVT::f64) && + "Other types should have been promoted for varargs!"); + Amt = 8; + } + Val = DAG.getNode(ISD::ADD, Val.getValueType(), Val, + DAG.getConstant(Amt, Val.getValueType())); + Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, + Val, VAListP, DAG.getSrcValue(VAListV)); + return std::make_pair(Result, Chain); +} + + + +std::pair IA64TargetLowering:: +LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, + SelectionDAG &DAG) { + assert(0 && "LowerFrameReturnAddress unimplemented"); + abort(); +} + diff --git a/lib/Target/IA64/IA64ISelLowering.h b/lib/Target/IA64/IA64ISelLowering.h new file mode 100644 index 00000000000..5b33348ced1 --- /dev/null +++ b/lib/Target/IA64/IA64ISelLowering.h @@ -0,0 +1,88 @@ +//===-- IA64ISelLowering.h - IA64 DAG Lowering Interface --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Duraid Madina and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that IA64 uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_IA64_IA64ISELLOWERING_H +#define LLVM_TARGET_IA64_IA64ISELLOWERING_H + +#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "IA64.h" + +namespace llvm { + namespace IA64ISD { + enum NodeType { + // Start the numbering where the builting ops and target ops leave off. + FIRST_NUMBER = ISD::BUILTIN_OP_END+IA64::INSTRUCTION_LIST_END, + + /// FSEL - Traditional three-operand fsel node. + /// + FSEL, + + /// FCFID - The FCFID instruction, taking an f64 operand and producing + /// and f64 value containing the FP representation of the integer that + /// was temporarily in the f64 operand. + FCFID, + + /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 + /// operand, producing an f64 value containing the integer representation + /// of that FP value. + FCTIDZ, FCTIWZ, + }; + } + + class IA64TargetLowering : public TargetLowering { + int VarArgsFrameIndex; // FrameIndex for start of varargs area. + //int ReturnAddrIndex; // FrameIndex for return slot. + unsigned GP, SP, RP; // FIXME - clean this mess up + + public: + IA64TargetLowering(TargetMachine &TM); + + unsigned VirtGPR; // this is public so it can be accessed in the selector + // for ISD::RET. add an accessor instead? FIXME + + /// LowerOperation - Provide custom lowering hooks for some operations. + /// +// XXX virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG); + + /// LowerArguments - This hook must be implemented to indicate how we should + /// lower the arguments for the specified function, into the specified DAG. + virtual std::vector + LowerArguments(Function &F, SelectionDAG &DAG); + + /// LowerCallTo - This hook lowers an abstract call to a function into an + /// actual call. + virtual std::pair + LowerCallTo(SDOperand Chain, const Type *RetTy, bool isVarArg, + unsigned CC, + bool isTailCall, SDOperand Callee, ArgListTy &Args, + SelectionDAG &DAG); + + virtual SDOperand LowerVAStart(SDOperand Chain, SDOperand VAListP, + Value *VAListV, SelectionDAG &DAG); + + virtual std::pair + LowerVAArg(SDOperand Chain, SDOperand VAListP, Value *VAListV, + const Type *ArgTy, SelectionDAG &DAG); + + virtual std::pair + LowerFrameReturnAddress(bool isFrameAddr, SDOperand Chain, unsigned Depth, + SelectionDAG &DAG); + +// XXX virtual MachineBasicBlock *InsertAtEndOfBasicBlock(MachineInstr *MI, +// XXX MachineBasicBlock *MBB); + }; +} + +#endif // LLVM_TARGET_IA64_IA64ISELLOWERING_H diff --git a/lib/Target/IA64/IA64ISelPattern.cpp b/lib/Target/IA64/IA64ISelPattern.cpp index b6a0c159060..d2351d0fbd7 100644 --- a/lib/Target/IA64/IA64ISelPattern.cpp +++ b/lib/Target/IA64/IA64ISelPattern.cpp @@ -1466,7 +1466,7 @@ pC = pA OR pB */ BuildMI(BB, IA64::PCMPEQUNC, 3, pTemp1) .addReg(IA64::r0).addReg(IA64::r0).addReg(pA); - BuildMI(BB, IA64::TPCMPEQ, 3, Result) + BuildMI(BB, IA64::TPCMPEQ, 4, Result) .addReg(pTemp1).addReg(IA64::r0).addReg(IA64::r0).addReg(pB); break; } @@ -1957,8 +1957,13 @@ pC = pA OR pB Select(Chain); IA64Lowering.restoreGP(BB); unsigned dummy = MakeReg(MVT::i64); - BuildMI(BB, IA64::ADD, 2, dummy).addConstantPoolIndex(CPIdx) - .addReg(IA64::r1); // CPI+GP + unsigned dummy2 = MakeReg(MVT::i64); + BuildMI(BB, IA64::MOVLIMM64, 1, dummy2).addConstantPoolIndex(CPIdx); + BuildMI(BB, IA64::ADD, 2, dummy).addReg(dummy2).addReg(IA64::r1); //CPI+GP + + + // OLD BuildMI(BB, IA64::ADD, 2, dummy).addConstantPoolIndex(CPIdx) + // (FIXME!) .addReg(IA64::r1); // CPI+GP if(!isBool) BuildMI(BB, Opc, 1, Result).addReg(dummy); else { // emit a little pseudocode to load a bool (stored in one byte) diff --git a/lib/Target/IA64/IA64InstrFormats.td b/lib/Target/IA64/IA64InstrFormats.td index 9d07acac724..44ffe16ec73 100644 --- a/lib/Target/IA64/IA64InstrFormats.td +++ b/lib/Target/IA64/IA64InstrFormats.td @@ -36,6 +36,14 @@ class AForm opcode, bits<6> qpReg, dag OL, string asmstr> : let Inst{5-0} = qpReg; } +class AForm_DAG opcode, bits<6> qpReg, dag OL, string asmstr, + list pattern> : + InstIA64 { + + let Pattern = pattern; + let Inst{5-0} = qpReg; +} + let isBranch = 1, isTerminator = 1 in class BForm opcode, bits<6> x6, bits<3> btype, dag OL, string asmstr> : InstIA64 { diff --git a/lib/Target/IA64/IA64InstrInfo.td b/lib/Target/IA64/IA64InstrInfo.td index 52d817a5eb3..9aa48290d90 100644 --- a/lib/Target/IA64/IA64InstrInfo.td +++ b/lib/Target/IA64/IA64InstrInfo.td @@ -19,7 +19,7 @@ def u6imm : Operand; def s8imm : Operand { let PrintMethod = "printS8ImmOperand"; } -def s14imm : Operand { +def s14imm : Operand { let PrintMethod = "printS14ImmOperand"; } def s22imm : Operand { @@ -32,10 +32,212 @@ def s64imm : Operand { let PrintMethod = "printS64ImmOperand"; } +let PrintMethod = "printGlobalOperand" in + def globaladdress : Operand; + // the asmprinter needs to know about calls let PrintMethod = "printCallOperand" in def calltarget : Operand; +/* new daggy action!!! */ + +def immSExt14 : PatLeaf<(i64 imm), [{ + // immSExt14 predicate - True if the immediate fits in a 14-bit sign extended + // field. Used by instructions like 'adds'. + int64_t v = (int64_t)N->getValue(); + return (v <= 8191 && v >= -8192); +}]>; + +def imm64 : PatLeaf<(i64 imm), [{ + // imm64 predicate - True if the immediate fits in a 64-bit + // field - i.e., true. used to keep movl happy + return true; +}]>; + +def ADD : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), + "add $dst = $src1, $src2;;", + [(set GR:$dst, (add GR:$src1, GR:$src2))]>; + +def ADD1 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), + "add $dst = $src1, $src2, 1;;", + [(set GR:$dst, (add (add GR:$src1, GR:$src2), 1))]>; + +def ADDS : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, s14imm:$imm), + "adds $dst = $imm, $src1;;", + [(set GR:$dst, (add GR:$src1, immSExt14:$imm))]>; + +def MOVL : AForm_DAG<0x03, 0x0b, (ops GR:$dst, s64imm:$imm), + "movl $dst = $imm;;", + [(set GR:$dst, imm64:$imm)]>; + +def ADDL_GA : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, globaladdress:$imm), + "addl $dst = $imm, $src1;;", + []>; + +def SUB : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), + "sub $dst = $src1, $src2;;", + [(set GR:$dst, (sub GR:$src1, GR:$src2))]>; + +def SUB1 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), + "sub $dst = $src1, $src2, 1;;", + [(set GR:$dst, (add (sub GR: $src1, GR:$src2), -1))]>; + +def GETFSIGD : AForm_DAG<0x03, 0x0b, (ops GR:$dst, FP:$src), + "getf.sig $dst = $src;;", + []>; + +def SETFSIGD : AForm_DAG<0x03, 0x0b, (ops FP:$dst, GR:$src), + "setf.sig $dst = $src;;", + []>; + +def XMALD : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3), + "xma.l $dst = $src1, $src2, $src3;;", + []>; + +// pseudocode for integer multiplication +def : Pat<(mul GR:$src1, GR:$src2), + (GETFSIGD (XMALD (SETFSIGD GR:$src1), (SETFSIGD GR:$src2), F0))>; + +// TODO: addp4 (addp4 dst = src, r0 is a 32-bit add) +// has imm form, too + +// def ADDS : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, s14imm:$imm), +// "adds $dst = $imm, $src1;;">; + +// load constants of various sizes // FIXME: prettyprint -ve constants +def : Pat<(i64 immSExt14:$imm), (ADDS r0, immSExt14:$imm)>; +def : Pat<(i64 imm64:$imm), (MOVL imm64:$imm)>; + +def AND : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), + "and $dst = $src1, $src2;;", + [(set GR:$dst, (and GR:$src1, GR:$src2))]>; +def ANDCM : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), + "andcm $dst = $src1, $src2;;", + [(set GR:$dst, (and GR:$src1, (not GR:$src2)))]>; +// TODO: and/andcm/or/xor/add/sub/shift immediate forms +def OR : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), + "or $dst = $src1, $src2;;", + [(set GR:$dst, (or GR:$src1, GR:$src2))]>; + +def pOR : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2, PR:$qp), + "($qp) or $dst = $src1, $src2;;">; + +def PCMPEQUNCR0R0 : AForm<0x03, 0x0b, (ops PR:$dst, PR:$qp), + "($qp) cmp.eq.unc $dst, p0 = r0, r0;;">; + +let isTwoAddress=1 in +def TPCMPEQR0R0 : AForm<0x03, 0x0b, (ops PR:$dst, PR:$bogus, PR:$qp), + "($qp) cmp.eq $dst, p0 = r0, r0;;">; + +/* our pseudocode for OR on predicates is: + * + +pC = pA OR pB +------------- + +(pA) cmp.eq.unc pC,p0 = r0,r0 // pC = pA + ;; +(pB) cmp.eq pC,p0 = r0,r0 // if (pB) pC = 1 + +*/ +/* +let isTwoAddress = 1 in { + def TPCMPEQ : AForm<0x03, 0x0b, + (ops PR:$dst, PR:$src2, GR:$src3, GR:$src4, PR:$qp), + "($qp) cmp.eq $dst, p0 = $src3, $src4;;">; +} +*/ + +// FIXME: these are bogus +def bOR : Pat<(or PR:$src1, PR:$src2), + (PCMPEQUNCR0R0 PR:$src1)>; + +def bXOR : Pat<(xor PR:$src1, PR:$src2), + (PCMPEQUNCR0R0 PR:$src1)>; + +def XOR : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), + "xor $dst = $src1, $src2;;", + [(set GR:$dst, (xor GR:$src1, GR:$src2))]>; + +def SHL : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), + "shl $dst = $src1, $src2;;", + [(set GR:$dst, (shl GR:$src1, GR:$src2))]>; + +/* +def CMPEQ : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.eq $dst, p0 = $src1, $src2;;">; +def CMPGT : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.gt $dst, p0 = $src1, $src2;;">; +def CMPGE : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.ge $dst, p0 = $src1, $src2;;">; +def CMPLT : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.lt $dst, p0 = $src1, $src2;;">; +def CMPLE : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.le $dst, p0 = $src1, $src2;;">; +def CMPNE : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.ne $dst, p0 = $src1, $src2;;">; +def CMPLTU : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.ltu $dst, p0 = $src1, $src2;;">; +def CMPGTU : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.gtu $dst, p0 = $src1, $src2;;">; +def CMPLEU : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.leu $dst, p0 = $src1, $src2;;">; +def CMPGEU : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.geu $dst, p0 = $src1, $src2;;">; +*/ + +// the following are all a bit unfortunate: we throw away the complement +// of the compare! +def CMPEQ : AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.eq $dst, p0 = $src1, $src2;;", + [(set PR:$dst, (seteq GR:$src1, GR:$src2))]>; +def CMPGT : AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.gt $dst, p0 = $src1, $src2;;", + [(set PR:$dst, (setgt GR:$src1, GR:$src2))]>; +def CMPGE : AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.ge $dst, p0 = $src1, $src2;;", + [(set PR:$dst, (setge GR:$src1, GR:$src2))]>; +def CMPLT : AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.lt $dst, p0 = $src1, $src2;;", + [(set PR:$dst, (setlt GR:$src1, GR:$src2))]>; +def CMPLE : AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.le $dst, p0 = $src1, $src2;;", + [(set PR:$dst, (setle GR:$src1, GR:$src2))]>; +def CMPNE : AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.ne $dst, p0 = $src1, $src2;;", + [(set PR:$dst, (setne GR:$src1, GR:$src2))]>; +def CMPLTU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.eq $dst, p0 = $src1, $src2;;", + [(set PR:$dst, (setult GR:$src1, GR:$src2))]>; +def CMPGTU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.eq $dst, p0 = $src1, $src2;;", + [(set PR:$dst, (setugt GR:$src1, GR:$src2))]>; +def CMPLEU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.eq $dst, p0 = $src1, $src2;;", + [(set PR:$dst, (setule GR:$src1, GR:$src2))]>; +def CMPGEU: AForm_DAG<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), + "cmp.eq $dst, p0 = $src1, $src2;;", + [(set PR:$dst, (setuge GR:$src1, GR:$src2))]>; + +// FIXME: tabelgen doesn't know that zxt1 is cheaper on ia64 than "andi", +// need to fix this one day + +def SXT1 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt1 $dst = $src;;", + [(set GR:$dst, (sext_inreg GR:$src, i8))]>; +def ZXT1 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src), "zxt1 $dst = $src;;", + [(set GR:$dst, (and GR:$src, 255))]>; +def SXT2 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt2 $dst = $src;;", + [(set GR:$dst, (sext_inreg GR:$src, i16))]>; +def ZXT2 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src), "zxt2 $dst = $src;;", + [(set GR:$dst, (and GR:$src, 65535))]>; +def SXT4 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt4 $dst = $src;;", + [(set GR:$dst, (sext_inreg GR:$src, i32))]>; +def ZXT4 : AForm_DAG<0x03, 0x0b, (ops GR:$dst, GR:$src), "zxt4 $dst = $src;;", + [(set GR:$dst, (and GR:$src, 1341835918))]>; // hehhehe NO - FIXME + +// TODO: support postincrement (reg, imm9) loads+stores - this needs more +// tablegen support + def PHI : PseudoInstIA64<(ops variable_ops), "PHI">; def IDEF : PseudoInstIA64<(ops variable_ops), "// IDEF">; def IUSE : PseudoInstIA64<(ops variable_ops), "// IUSE">; @@ -96,6 +298,7 @@ def MOVSIMM22 : AForm<0x03, 0x0b, (ops GR:$dst, s22imm:$imm), def MOVLIMM64 : AForm<0x03, 0x0b, (ops GR:$dst, s64imm:$imm), "movl $dst = $imm;;">; +/* def AND : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), "and $dst = $src1, $src2;;">; def OR : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), @@ -104,6 +307,7 @@ def XOR : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), "xor $dst = $src1, $src2;;">; def SHL : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), "shl $dst = $src1, $src2;;">; +*/ def SHLI : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, u6imm:$imm), "shl $dst = $src1, $imm;;">; def SHRU : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), @@ -123,35 +327,14 @@ def EXTRU : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, u6imm:$imm1, u6imm:$imm2), def DEPZ : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, u6imm:$imm1, u6imm:$imm2), "dep.z $dst = $src1, $imm1, $imm2;;">; +/* def SXT1 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt1 $dst = $src;;">; def ZXT1 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "zxt1 $dst = $src;;">; def SXT2 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt2 $dst = $src;;">; def ZXT2 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "zxt2 $dst = $src;;">; def SXT4 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "sxt4 $dst = $src;;">; def ZXT4 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src), "zxt4 $dst = $src;;">; - -// the following are all a bit unfortunate: we throw away the complement -// of the compare! -def CMPEQ : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), - "cmp.eq $dst, p0 = $src1, $src2;;">; -def CMPGT : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), - "cmp.gt $dst, p0 = $src1, $src2;;">; -def CMPGE : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), - "cmp.ge $dst, p0 = $src1, $src2;;">; -def CMPLT : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), - "cmp.lt $dst, p0 = $src1, $src2;;">; -def CMPLE : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), - "cmp.le $dst, p0 = $src1, $src2;;">; -def CMPNE : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), - "cmp.ne $dst, p0 = $src1, $src2;;">; -def CMPLTU : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), - "cmp.ltu $dst, p0 = $src1, $src2;;">; -def CMPGTU : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), - "cmp.gtu $dst, p0 = $src1, $src2;;">; -def CMPLEU : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), - "cmp.leu $dst, p0 = $src1, $src2;;">; -def CMPGEU : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2), - "cmp.geu $dst, p0 = $src1, $src2;;">; +*/ // and we do the whole thing again for FP compares! def FCMPEQ : AForm<0x03, 0x0b, (ops PR:$dst, FP:$src1, FP:$src2), @@ -186,8 +369,6 @@ def PCMPNE : AForm<0x03, 0x0b, (ops PR:$dst, GR:$src1, GR:$src2, PR:$qp), def BCMPEQ : AForm<0x03, 0x0b, (ops PR:$dst1, PR:$dst2, GR:$src1, GR:$src2), "cmp.eq $dst1, dst2 = $src1, $src2;;">; -def ADD : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), - "add $dst = $src1, $src2;;">; def ADDIMM14 : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, s14imm:$imm), "adds $dst = $imm, $src1;;">; @@ -205,8 +386,6 @@ def TPCMPIMM8NE : AForm<0x03, 0x0b, "($qp) cmp.ne $dst , p0 = $imm, $src2;;">; } -def SUB : AForm<0x03, 0x0b, (ops GR:$dst, GR:$src1, GR:$src2), - "sub $dst = $src1, $src2;;">; def SUBIMM8 : AForm<0x03, 0x0b, (ops GR:$dst, s8imm:$imm, GR:$src2), "sub $dst = $imm, $src2;;">; @@ -312,6 +491,8 @@ def STF8 : AForm<0x03, 0x0b, (ops GR:$dstPtr, FP:$value), "stfd [$dstPtr] = $value;;">; let isTerminator = 1, isBranch = 1 in { + def BRL_NOTCALL : RawForm<0x03, 0xb0, (ops i64imm:$dst), + "(p0) brl.cond.sptk $dst;;">; def BRLCOND_NOTCALL : RawForm<0x03, 0xb0, (ops PR:$qp, i64imm:$dst), "($qp) brl.cond.sptk $dst;;">; def BRCOND_NOTCALL : RawForm<0x03, 0xb0, (ops PR:$qp, GR:$dst), @@ -334,8 +515,14 @@ let isCall = 1, isTerminator = 1, isBranch = 1, F106,F107,F108,F109,F110,F111,F112,F113,F114,F115,F116,F117,F118,F119, F120,F121,F122,F123,F124,F125,F126,F127, out0,out1,out2,out3,out4,out5,out6,out7] in { - def BRCALL : RawForm<0x03, 0xb0, (ops calltarget:$dst), +// old pattern call + def BRCALL: RawForm<0x03, 0xb0, (ops calltarget:$dst), + "br.call.sptk rp = $dst;;">; // FIXME: teach llvm about branch regs? +// new daggy stuff! + def BRCALL_IPREL : RawForm<0x03, 0xb0, (ops calltarget:$dst, variable_ops), "br.call.sptk rp = $dst;;">; // FIXME: teach llvm about branch regs? + def BRCALL_INDIRECT : RawForm<0x03, 0xb0, (ops GR:$branchreg, variable_ops), + "br.call.sptk rp = $branchreg;;">; // FIXME: teach llvm about branch regs? def BRLCOND_CALL : RawForm<0x03, 0xb0, (ops PR:$qp, i64imm:$dst), "($qp) brl.cond.call.sptk $dst;;">; def BRCOND_CALL : RawForm<0x03, 0xb0, (ops PR:$qp, GR:$dst), diff --git a/lib/Target/IA64/IA64RegisterInfo.td b/lib/Target/IA64/IA64RegisterInfo.td index d6366701939..d50f8f43ed4 100644 --- a/lib/Target/IA64/IA64RegisterInfo.td +++ b/lib/Target/IA64/IA64RegisterInfo.td @@ -211,7 +211,7 @@ def out6 : GR<6, "out6">; def out7 : GR<7, "out7">; // application (special) registers: -// " previous function state" application register +// "previous function state" application register def AR_PFS : GR<0, "ar.pfs">; // "return pointer" (this is really branch register b0) @@ -255,7 +255,7 @@ def GR : RegisterClass<"IA64", i64, 64, r104, r105, r106, r107, r108, r109, r110, r111, r112, r113, r114, r115, r116, r117, r118, r119, r120, r121, r122, r123, r124, r125, r126, r127, - r0, r1, r2, r12, r13, r15, r22]> // the last 15 are special (look down) + r0, r1, r2, r12, r13, r15, r22, rp]> // the last 16 are special (look down) { let MethodProtos = [{ iterator allocation_order_begin(MachineFunction &MF) const; @@ -264,13 +264,13 @@ def GR : RegisterClass<"IA64", i64, 64, let MethodBodies = [{ GRClass::iterator GRClass::allocation_order_begin(MachineFunction &MF) const { - // hide registers appropriately: + // hide the 8 out? registers appropriately: return begin()+(8-(MF.getInfo()->outRegsUsed)); } GRClass::iterator GRClass::allocation_order_end(MachineFunction &MF) const { - int numReservedRegs=7; // the 7 special registers r0,r1,r2,r12,r13 etc + int numReservedRegs=8; // the 8 special registers r0,r1,r2,r12,r13 etc // we also can't allocate registers for use as locals if they're // already required as 'out' registers diff --git a/lib/Target/IA64/IA64TargetMachine.cpp b/lib/Target/IA64/IA64TargetMachine.cpp index 7d80e5770c4..3760f489b52 100644 --- a/lib/Target/IA64/IA64TargetMachine.cpp +++ b/lib/Target/IA64/IA64TargetMachine.cpp @@ -37,6 +37,9 @@ namespace { cl::desc("Disable the IA64 asm printer, for use " "when profiling the code generator.")); + cl::opt EnableDAGIsel("enable-ia64-dag-isel", cl::Hidden, + cl::desc("Enable the IA64 DAG->DAG isel")); + // Register the target. RegisterTarget X("ia64", " IA-64 (Itanium)"); } @@ -97,8 +100,12 @@ bool IA64TargetMachine::addPassesToEmitFile(PassManager &PM, // Make sure that no unreachable blocks are instruction selected. PM.add(createUnreachableBlockEliminationPass()); - PM.add(createIA64PatternInstructionSelector(*this)); - + // Add an instruction selector + if(EnableDAGIsel) + PM.add(createIA64DAGToDAGInstructionSelector(*this)); + else + PM.add(createIA64PatternInstructionSelector(*this)); + /* XXX not yet. ;) // Run optional SSA-based machine code optimizations next... if (!NoSSAPeephole) diff --git a/lib/Target/IA64/Makefile b/lib/Target/IA64/Makefile index 8bd2b6a790f..f519cf92db6 100644 --- a/lib/Target/IA64/Makefile +++ b/lib/Target/IA64/Makefile @@ -11,7 +11,8 @@ TARGET = IA64 # Make sure that tblgen is run, first thing. BUILT_SOURCES = IA64GenRegisterInfo.h.inc IA64GenRegisterNames.inc \ IA64GenRegisterInfo.inc IA64GenInstrNames.inc \ - IA64GenInstrInfo.inc IA64GenAsmWriter.inc + IA64GenInstrInfo.inc IA64GenAsmWriter.inc \ + IA64GenDAGISel.inc include $(LEVEL)/Makefile.common -- 2.34.1